From 37eb3b4ed07e12a6b5f849fd4e8c0247cbd59eed Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:37 -0400 Subject: [PATCH 01/37] client: pass a mask parameter to path_walk ll_walk expects to get a set of attributes out of a path_walk. Pass a caps mask parameter into path_walk, and then apply it when we reach the last component of the path. This may prevent us from having to further iteract with the server after the pathwalk, in some cases. If we know that we're going to need certain caps to do the actual operation we can request them during the lookup and may have all that we need by the time we go to do the real request. Signed-off-by: Jeff Layton --- src/client/Client.cc | 27 ++++++++++++++++----------- src/client/Client.h | 2 +- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 4af0fe0f8aa94..fff9963afdfbf 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -6032,7 +6032,7 @@ int Client::get_or_create(Inode *dir, const char* name, } int Client::path_walk(const filepath& origpath, InodeRef *end, bool followsym, - int uid, int gid) + int mask, int uid, int gid) { filepath path = origpath; InodeRef cur; @@ -6050,10 +6050,10 @@ int Client::path_walk(const filepath& origpath, InodeRef *end, bool followsym, ldout(cct, 10) << "path_walk " << path << dendl; int symlinks = 0; - int caps = 0; unsigned i=0; while (i < path.depth() && cur) { + int caps = 0; const string &dname = path[i]; ldout(cct, 10) << " " << i << " " << *cur << " " << dname << dendl; ldout(cct, 20) << " (path is " << path << ")" << dendl; @@ -6064,6 +6064,11 @@ int Client::path_walk(const filepath& origpath, InodeRef *end, bool followsym, return r; caps = CEPH_CAP_AUTH_SHARED; } + + /* Get extra requested caps on the last component */ + if (i == (path.depth() - 1)) + caps |= mask; + int r = _lookup(cur.get(), dname, caps, &next, uid, gid); if (r < 0) return r; @@ -6629,7 +6634,7 @@ int Client::stat(const char *relpath, struct stat *stbuf, tout(cct) << relpath << std::endl; filepath path(relpath); InodeRef in; - int r = path_walk(path, &in); + int r = path_walk(path, &in, true, mask); if (r < 0) return r; r = _getattr(in, mask); @@ -6652,7 +6657,7 @@ int Client::lstat(const char *relpath, struct stat *stbuf, filepath path(relpath); InodeRef in; // don't follow symlinks - int r = path_walk(path, &in, false); + int r = path_walk(path, &in, false, mask); if (r < 0) return r; r = _getattr(in, mask); @@ -7554,7 +7559,7 @@ int Client::open(const char *relpath, int flags, mode_t mode, int stripe_unit, bool created = false; /* O_CREATE with O_EXCL enforces O_NOFOLLOW. */ bool followsym = !((flags & O_NOFOLLOW) || ((flags & O_CREAT) && (flags & O_EXCL))); - int r = path_walk(path, &in, followsym, uid, gid); + int r = path_walk(path, &in, followsym, ceph_caps_for_mode(mode), uid, gid); if (r == 0 && (flags & O_CREAT) && (flags & O_EXCL)) return -EEXIST; @@ -7571,7 +7576,7 @@ int Client::open(const char *relpath, int flags, mode_t mode, int stripe_unit, string dname = dirpath.last_dentry(); dirpath.pop_dentry(); InodeRef dir; - r = path_walk(dirpath, &dir, true, uid, gid); + r = path_walk(dirpath, &dir, true, 0, uid, gid); if (r < 0) goto out; if (cct->_conf->client_permissions) { @@ -9553,7 +9558,7 @@ int Client::ll_walk(const char* name, Inode **out, struct stat *attr) tout(cct) << "ll_walk" << std::endl; tout(cct) << name << std::endl; - rc = path_walk(fp, &in, false); + rc = path_walk(fp, &in, false, CEPH_STAT_CAP_INODE_ALL); if (rc < 0) { attr->st_ino = 0; *out = NULL; @@ -9739,7 +9744,7 @@ int Client::getxattr(const char *path, const char *name, void *value, size_t siz { Mutex::Locker lock(client_lock); InodeRef in; - int r = Client::path_walk(path, &in, true); + int r = Client::path_walk(path, &in, true, CEPH_STAT_CAP_XATTR); if (r < 0) return r; return _getxattr(in, name, value, size); @@ -9749,7 +9754,7 @@ int Client::lgetxattr(const char *path, const char *name, void *value, size_t si { Mutex::Locker lock(client_lock); InodeRef in; - int r = Client::path_walk(path, &in, false); + int r = Client::path_walk(path, &in, false, CEPH_STAT_CAP_XATTR); if (r < 0) return r; return _getxattr(in, name, value, size); @@ -9768,7 +9773,7 @@ int Client::listxattr(const char *path, char *list, size_t size) { Mutex::Locker lock(client_lock); InodeRef in; - int r = Client::path_walk(path, &in, true); + int r = Client::path_walk(path, &in, true, CEPH_STAT_CAP_XATTR); if (r < 0) return r; return Client::_listxattr(in.get(), list, size); @@ -9778,7 +9783,7 @@ int Client::llistxattr(const char *path, char *list, size_t size) { Mutex::Locker lock(client_lock); InodeRef in; - int r = Client::path_walk(path, &in, false); + int r = Client::path_walk(path, &in, false, CEPH_STAT_CAP_XATTR); if (r < 0) return r; return Client::_listxattr(in.get(), list, size); diff --git a/src/client/Client.h b/src/client/Client.h index 06bdc08bef00f..b30ddee978939 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -519,7 +519,7 @@ class Client : public Dispatcher, public md_config_obs_t { // path traversal for high-level interface InodeRef cwd; int path_walk(const filepath& fp, InodeRef *end, bool followsym=true, - int uid=-1, int gid=-1); + int mask=0, int uid=-1, int gid=-1); int fill_stat(Inode *in, struct stat *st, frag_info_t *dirstat=0, nest_info_t *rstat=0); int fill_stat(InodeRef& in, struct stat *st, frag_info_t *dirstat=0, nest_info_t *rstat=0) { return fill_stat(in.get(), st, dirstat, rstat); From d0fc2a852ad148130b4aa4000afac5674ba9d537 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 29 Aug 2016 07:16:37 -0400 Subject: [PATCH 02/37] mds/mdstypes: add btime to inode Signed-off-by: Sage Weil --- src/mds/mdstypes.cc | 13 +++++++++++-- src/mds/mdstypes.h | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc index 1428af65f0f80..6432a2c0cf509 100644 --- a/src/mds/mdstypes.cc +++ b/src/mds/mdstypes.cc @@ -238,7 +238,7 @@ void inline_data_t::decode(bufferlist::iterator &p) */ void inode_t::encode(bufferlist &bl, uint64_t features) const { - ENCODE_START(13, 6, bl); + ENCODE_START(14, 6, bl); ::encode(ino, bl); ::encode(rdev, bl); @@ -285,12 +285,14 @@ void inode_t::encode(bufferlist &bl, uint64_t features) const ::encode(last_scrub_version, bl); ::encode(last_scrub_stamp, bl); + ::encode(btime, bl); + ENCODE_FINISH(bl); } void inode_t::decode(bufferlist::iterator &p) { - DECODE_START_LEGACY_COMPAT_LEN(13, 6, 6, p); + DECODE_START_LEGACY_COMPAT_LEN(14, 6, 6, p); ::decode(ino, p); ::decode(rdev, p); @@ -363,6 +365,11 @@ void inode_t::decode(bufferlist::iterator &p) ::decode(last_scrub_version, p); ::decode(last_scrub_stamp, p); } + if (struct_v >= 14) { + ::decode(btime, p); + } else { + btime = utime_t(); + } DECODE_FINISH(p); } @@ -372,6 +379,7 @@ void inode_t::dump(Formatter *f) const f->dump_unsigned("ino", ino); f->dump_unsigned("rdev", rdev); f->dump_stream("ctime") << ctime; + f->dump_stream("btime") << btime; f->dump_unsigned("mode", mode); f->dump_unsigned("uid", uid); f->dump_unsigned("gid", gid); @@ -443,6 +451,7 @@ int inode_t::compare(const inode_t &other, bool *divergent) const if (version == other.version) { if (rdev != other.rdev || ctime != other.ctime || + btime != other.btime || mode != other.mode || uid != other.uid || gid != other.gid || diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 7039e6c05c08f..a358d354f2ed6 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -470,6 +470,7 @@ struct inode_t { // affected by any inode change... utime_t ctime; // inode change time + utime_t btime; // birth time // perm (namespace permissions) uint32_t mode; From 9591c51c90cab24f18eabd83d9972350ba5ce450 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 29 Aug 2016 07:16:37 -0400 Subject: [PATCH 03/37] mds/Server: set btime=ctime on creation Signed-off-by: Sage Weil --- src/mds/Server.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 40d9fd8db3f7c..5cf18b1122bc1 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2354,7 +2354,8 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino in->inode.uid = mdr->client_request->get_caller_uid(); - in->inode.ctime = in->inode.mtime = in->inode.atime = mdr->get_op_stamp(); + in->inode.btime = in->inode.ctime = in->inode.mtime = in->inode.atime = + mdr->get_op_stamp(); MClientRequest *req = mdr->client_request; if (req->get_data().length()) { From 9cbfc47b47efef05eb9f59f1198c4b0636794422 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 29 Aug 2016 07:16:38 -0400 Subject: [PATCH 04/37] mds/MDCache: set btime on system inodes Signed-off-by: Sage Weil --- src/mds/MDCache.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index c5b0ebae13cc3..3a1f5199fe270 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -355,7 +355,8 @@ void MDCache::create_unlinked_system_inode(CInode *in, inodeno_t ino, in->inode.mode = 0500 | mode; in->inode.size = 0; in->inode.ctime = - in->inode.mtime = ceph_clock_now(g_ceph_context); + in->inode.mtime = + in->inode.btime = ceph_clock_now(g_ceph_context); in->inode.nlink = 1; in->inode.truncate_size = -1ull; From e292d2bef59e912ac9c4fdf9572bf79b291341ca Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 29 Aug 2016 07:16:38 -0400 Subject: [PATCH 05/37] include/ceph_features: add FS_BTIME features Signed-off-by: Sage Weil --- src/include/ceph_features.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h index 710e9a4704a94..27f0890a89c74 100755 --- a/src/include/ceph_features.h +++ b/src/include/ceph_features.h @@ -84,6 +84,7 @@ // duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5 #define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING (1ULL<<58) /* New, v7 encoding */ #define CEPH_FEATURE_FS_FILE_LAYOUT_V2 (1ULL<<58) /* file_layout_t */ +#define CEPH_FEATURE_FS_BTIME (1ULL<<59) /* btime */ #define CEPH_FEATURE_RESERVED2 (1ULL<<61) /* slow down, we are almost out... */ #define CEPH_FEATURE_RESERVED (1ULL<<62) /* DO NOT USE THIS ... last bit! */ @@ -180,6 +181,7 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) { CEPH_FEATURE_SERVER_JEWEL | \ CEPH_FEATURE_FS_FILE_LAYOUT_V2 | \ CEPH_FEATURE_SERVER_KRAKEN | \ + CEPH_FEATURE_FS_BTIME | \ 0ULL) #define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL From 4349879a69462d2bd37cd5f3883b6e9f242db844 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 29 Aug 2016 07:16:38 -0400 Subject: [PATCH 06/37] mds: pass btime in InodeStat MClientReply, if feature is present Signed-off-by: Sage Weil --- src/mds/CInode.cc | 3 +++ src/messages/MClientReply.h | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 457395e5ebe05..11f50fd97d469 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -3357,6 +3357,9 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, if (session->connection->has_feature(CEPH_FEATURE_FS_FILE_LAYOUT_V2)) { ::encode(layout.pool_ns, bl); } + if (session->connection->has_feature(CEPH_FEATURE_FS_BTIME)) { + ::encode(any_i->btime, bl); + } return valid; } diff --git a/src/messages/MClientReply.h b/src/messages/MClientReply.h index 44ceae2bf472d..21bf239ccdee1 100644 --- a/src/messages/MClientReply.h +++ b/src/messages/MClientReply.h @@ -185,6 +185,10 @@ struct InodeStat { if ((features & CEPH_FEATURE_FS_FILE_LAYOUT_V2)) ::decode(layout.pool_ns, p); + if ((features & CEPH_FEATURE_FS_BTIME)) + ::decode(btime, p); + else + btime = utime_t(); } // see CInode::encode_inodestat for encoder. From e97e74a4931e1b4de97193c2a42d61a683a67c73 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 29 Aug 2016 07:16:38 -0400 Subject: [PATCH 07/37] client: keep btime in Inode and InodeStat Signed-off-by: Sage Weil Signed-off-by: Jeff Layton --- src/client/Client.cc | 2 ++ src/client/Inode.cc | 1 + src/client/Inode.h | 1 + src/messages/MClientReply.h | 2 +- 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index fff9963afdfbf..47eef726e5497 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -869,6 +869,7 @@ Inode * Client::add_update_inode(InodeStat *st, utime_t from, in->mode = st->mode; in->uid = st->uid; in->gid = st->gid; + in->btime = st->btime; } if ((issued & CEPH_CAP_LINK_EXCL) == 0) { @@ -9495,6 +9496,7 @@ Inode *Client::open_snapdir(Inode *diri) in->gid = diri->gid; in->mtime = diri->mtime; in->ctime = diri->ctime; + in->btime = diri->btime; in->size = diri->size; in->dirfragtree.clear(); diff --git a/src/client/Inode.cc b/src/client/Inode.cc index 454ae9d2c4210..08d6ab31e4440 100644 --- a/src/client/Inode.cc +++ b/src/client/Inode.cc @@ -346,6 +346,7 @@ void Inode::dump(Formatter *f) const if (rdev) f->dump_unsigned("rdev", rdev); f->dump_stream("ctime") << ctime; + f->dump_stream("btime") << btime; f->dump_stream("mode") << '0' << std::oct << mode << std::dec; f->dump_unsigned("uid", uid); f->dump_unsigned("gid", gid); diff --git a/src/client/Inode.h b/src/client/Inode.h index b7ed726a2df1b..d17475504aed2 100644 --- a/src/client/Inode.h +++ b/src/client/Inode.h @@ -92,6 +92,7 @@ struct Inode { // affected by any inode change... utime_t ctime; // inode change time + utime_t btime; // birth time // perm (namespace permissions) uint32_t mode; diff --git a/src/messages/MClientReply.h b/src/messages/MClientReply.h index 21bf239ccdee1..a997448197533 100644 --- a/src/messages/MClientReply.h +++ b/src/messages/MClientReply.h @@ -102,7 +102,7 @@ struct InodeStat { version_t xattr_version; ceph_mds_reply_cap cap; file_layout_t layout; - utime_t ctime, mtime, atime; + utime_t ctime, btime, mtime, atime; uint32_t time_warp_seq; uint64_t size, max_size; uint64_t truncate_size; From 34c6d592a0d2325d46a2829effdad6f7429a656e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:38 -0400 Subject: [PATCH 08/37] ceph: break up ll_getattr into two functions ...in preparation for adding a ll_getattrx. Signed-off-by: Jeff Layton --- src/client/Client.cc | 19 ++++++++++++------- src/client/Client.h | 2 ++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 47eef726e5497..f8cdce0e87039 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -9679,24 +9679,29 @@ Inode *Client::ll_get_inode(vinodeno_t vino) return in; } -int Client::ll_getattr(Inode *in, struct stat *attr, int uid, int gid) +int Client::_ll_getattr(Inode *in, int uid, int gid) { - Mutex::Locker lock(client_lock); - vinodeno_t vino = _get_vino(in); ldout(cct, 3) << "ll_getattr " << vino << dendl; tout(cct) << "ll_getattr" << std::endl; tout(cct) << vino.ino.val << std::endl; - int res; if (vino.snapid < CEPH_NOSNAP) - res = 0; + return 0; else - res = _getattr(in, CEPH_STAT_CAP_INODE_ALL, uid, gid); + return _getattr(in, CEPH_STAT_CAP_INODE_ALL, uid, gid); +} + +int Client::ll_getattr(Inode *in, struct stat *attr, int uid, int gid) +{ + Mutex::Locker lock(client_lock); + + int res = _ll_getattr(in, uid, gid); + if (res == 0) fill_stat(in, attr); - ldout(cct, 3) << "ll_getattr " << vino << " = " << res << dendl; + ldout(cct, 3) << "ll_getattr " << _get_vino(in) << " = " << res << dendl; return res; } diff --git a/src/client/Client.h b/src/client/Client.h index b30ddee978939..1a7cefd2257e5 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -908,6 +908,8 @@ class Client : public Dispatcher, public md_config_obs_t { mds_rank_t _get_random_up_mds() const; + int _ll_getattr(Inode *in, int uid, int gid); + public: int mount(const std::string &mount_root, bool require_mds=false); void unmount(); From 2115de04a417e6df4272fc836829b70bd6a2b97e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:38 -0400 Subject: [PATCH 09/37] client: move the device bitshift handling macros to Client.h We'll need them in Client.cc now in addition to FUSE specific code. Signed-off-by: Jeff Layton --- src/client/Client.h | 8 ++++++++ src/client/fuse_ll.cc | 7 ------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/client/Client.h b/src/client/Client.h index 1a7cefd2257e5..5b0b7ee0a4c3a 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -93,6 +93,14 @@ struct CommandOp std::string *outs; }; +/* Device bit shift handling */ +#define MINORBITS 20 +#define MINORMASK ((1U << MINORBITS) - 1) + +#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) +#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) +#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) + /* error code for ceph_fuse */ #define CEPH_FUSE_NO_MDS_UP -(1<<2) /* no mds up deteced in ceph_fuse */ diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc index de14029943d1b..b30e2e0ba44bf 100644 --- a/src/client/fuse_ll.cc +++ b/src/client/fuse_ll.cc @@ -41,13 +41,6 @@ #define FINO_STAG(x) ((x) >> 48) #define MAKE_FINO(i,s) ((i) | ((s) << 48)) -#define MINORBITS 20 -#define MINORMASK ((1U << MINORBITS) - 1) - -#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) -#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) -#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) - static uint32_t new_encode_dev(dev_t dev) { unsigned major = MAJOR(dev); From f7c885e1f9cefc359f8b6fe7e51e212387432529 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:38 -0400 Subject: [PATCH 10/37] libcephfs: add a ceph_ll_getattrx and ceph_statx New interfaces for fetching extended (and selective) stat information. Additionally, applications can specify AT_NO_ATTR_SYNC in the flags to indicate that they want to do a "lazy" statx that just hands out the inode info from the cache, or AT_SYMLINK_NOFOLLOW to avoid following symlinks when walking the path. Signed-off-by: Jeff Layton --- ceph.spec.in | 1 + debian/libcephfs-dev.install | 1 + src/client/Client.cc | 137 ++++++++++++++++++++++++++++++++ src/client/Client.h | 10 +++ src/include/cephfs/ceph_statx.h | 85 ++++++++++++++++++++ src/include/cephfs/libcephfs.h | 18 +++++ src/libcephfs.cc | 16 ++++ src/test/libcephfs/test.cc | 35 ++++++++ 8 files changed, 303 insertions(+) create mode 100644 src/include/cephfs/ceph_statx.h diff --git a/ceph.spec.in b/ceph.spec.in index 6fd2cb3f97742..d52e2ab2b6503 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -1371,6 +1371,7 @@ ln -sf %{_libdir}/librbd.so.1 /usr/lib64/qemu/librbd.so.1 %defattr(-,root,root,-) %dir %{_includedir}/cephfs %{_includedir}/cephfs/libcephfs.h +%{_includedir}/cephfs/ceph_statx.h %{_libdir}/libcephfs.so %files -n python-cephfs diff --git a/debian/libcephfs-dev.install b/debian/libcephfs-dev.install index 625c874ec146b..d2d648719fdbe 100644 --- a/debian/libcephfs-dev.install +++ b/debian/libcephfs-dev.install @@ -1,2 +1,3 @@ usr/include/cephfs/libcephfs.h +usr/include/cephfs/ceph_statx.h usr/lib/libcephfs.so diff --git a/src/client/Client.cc b/src/client/Client.cc index f8cdce0e87039..6234ea4b26b74 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -94,6 +94,8 @@ #include "include/assert.h" #include "include/stat.h" +#include "include/cephfs/ceph_statx.h" + #if HAVE_GETGROUPLIST #include #include @@ -6648,6 +6650,56 @@ int Client::stat(const char *relpath, struct stat *stbuf, return r; } +unsigned Client::statx_to_mask(unsigned int flags, unsigned int want) +{ + unsigned mask = 0; + + /* if NO_ATTR_SYNC is set, then we don't need any -- just use what's in cache */ + if (flags & AT_NO_ATTR_SYNC) + goto out; + + /* Always set PIN to distinguish from AT_NO_ATTR_SYNC case */ + mask |= CEPH_CAP_PIN; + if (want & (CEPH_STATX_MODE|CEPH_STATX_UID|CEPH_STATX_GID|CEPH_STATX_RDEV|CEPH_STATX_BTIME)) + mask |= CEPH_CAP_AUTH_SHARED; + if (want & CEPH_STATX_NLINK) + mask |= CEPH_CAP_LINK_SHARED; + if (want & (CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME|CEPH_STATX_SIZE|CEPH_STATX_BLOCKS)) + mask |= CEPH_CAP_FILE_SHARED; + +out: + return mask; +} + +int Client::statx(const char *relpath, struct ceph_statx *stx, + unsigned int want, unsigned int flags) +{ + ldout(cct, 3) << "statx enter (relpath " << relpath << " want " << want << ")" << dendl; + Mutex::Locker lock(client_lock); + tout(cct) << "statx" << std::endl; + tout(cct) << relpath << std::endl; + filepath path(relpath); + InodeRef in; + + unsigned mask = statx_to_mask(flags, want); + + int r = path_walk(path, &in, flags & AT_SYMLINK_NOFOLLOW, mask); + if (r < 0) + return r; + + if (mask && !in->caps_issued_mask(mask)) { + r = _getattr(in, mask); + if (r < 0) { + ldout(cct, 3) << "statx exit on error!" << dendl; + return r; + } + } + + fill_statx(in, mask, stx); + ldout(cct, 3) << "statx exit (relpath " << relpath << " mask " << stx->stx_mask << ")" << dendl; + return r; +} + int Client::lstat(const char *relpath, struct stat *stbuf, frag_info_t *dirstat, int mask) { @@ -6718,6 +6770,74 @@ int Client::fill_stat(Inode *in, struct stat *st, frag_info_t *dirstat, nest_inf return in->caps_issued(); } +void Client::fill_statx(Inode *in, unsigned int mask, struct ceph_statx *stx) +{ + ldout(cct, 10) << "fill_statx on " << in->ino << " snap/dev" << in->snapid + << " mode 0" << oct << in->mode << dec + << " mtime " << in->mtime << " ctime " << in->ctime << dendl; + memset(stx, 0, sizeof(struct ceph_statx)); + + /* + * If mask is 0, then the caller set AT_NO_ATTR_SYNC. Reset the mask + * so that all bits are set. + */ + if (!mask) + mask = ~0; + + /* These are always considered to be available */ + stx->stx_dev_major = in->snapid >> 32; + stx->stx_dev_minor = (uint32_t)in->snapid; + stx->stx_blksize = MAX(in->layout.stripe_unit, 4096); + + if (use_faked_inos()) + stx->stx_ino = in->faked_ino; + else + stx->stx_ino = in->ino; + stx->stx_rdev_minor = MINOR(in->rdev); + stx->stx_rdev_major = MAJOR(in->rdev); + stx->stx_mask |= (CEPH_STATX_INO|CEPH_STATX_RDEV); + + if (mask & CEPH_CAP_AUTH_SHARED) { + stx->stx_uid = in->uid; + stx->stx_gid = in->gid; + stx->stx_mode = in->mode; + stx->stx_btime = in->btime.sec(); + stx->stx_btime_ns = in->btime.nsec(); + stx->stx_mask |= (CEPH_STATX_MODE|CEPH_STATX_UID|CEPH_STATX_GID|CEPH_STATX_BTIME); + } + + if (mask & CEPH_CAP_LINK_SHARED) { + stx->stx_nlink = in->nlink; + stx->stx_mask |= CEPH_STATX_NLINK; + } + + if (mask & CEPH_CAP_FILE_SHARED) { + if (in->ctime > in->mtime) { + stx->stx_ctime = in->ctime.sec(); + stx->stx_ctime_ns = in->ctime.nsec(); + } else { + stx->stx_ctime = in->mtime.sec(); + stx->stx_ctime_ns = in->mtime.nsec(); + } + stx->stx_atime = in->atime.sec(); + stx->stx_atime_ns = in->atime.nsec(); + stx->stx_mtime = in->mtime.sec(); + stx->stx_mtime_ns = in->mtime.nsec(); + + if (in->is_dir()) { + if (cct->_conf->client_dirsize_rbytes) + stx->stx_size = in->rstat.rbytes; + else + stx->stx_size = in->dirstat.size(); + stx->stx_blocks = 1; + } else { + stx->stx_size = in->size; + stx->stx_blocks = (in->size + 511) >> 9; + } + stx->stx_mask |= (CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME|CEPH_STATX_SIZE|CEPH_STATX_BLOCKS); + } +} + void Client::touch_dn(Dentry *dn) { lru.lru_touch(dn); @@ -9705,6 +9825,23 @@ int Client::ll_getattr(Inode *in, struct stat *attr, int uid, int gid) return res; } +int Client::ll_getattrx(Inode *in, struct ceph_statx *stx, unsigned int want, + unsigned int flags, int uid, int gid) +{ + Mutex::Locker lock(client_lock); + + int res = 0; + unsigned mask = statx_to_mask(flags, want); + + if (mask && !in->caps_issued_mask(mask)) + res = _ll_getattr(in, uid, gid); + + if (res == 0) + fill_statx(in, mask, stx); + ldout(cct, 3) << "ll_getattrx " << _get_vino(in) << " = " << res << dendl; + return res; +} + int Client::ll_setattr(Inode *in, struct stat *attr, int mask, int uid, int gid) { diff --git a/src/client/Client.h b/src/client/Client.h index 5b0b7ee0a4c3a..520f000a3e095 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -532,6 +532,12 @@ class Client : public Dispatcher, public md_config_obs_t { int fill_stat(InodeRef& in, struct stat *st, frag_info_t *dirstat=0, nest_info_t *rstat=0) { return fill_stat(in.get(), st, dirstat, rstat); } + + void fill_statx(Inode *in, unsigned int mask, struct ceph_statx *stx); + void fill_statx(InodeRef& in, unsigned int mask, struct ceph_statx *stx) { + return fill_statx(in.get(), mask, stx); + } + void touch_dn(Dentry *dn); // trim cache. @@ -987,7 +993,9 @@ class Client : public Dispatcher, public md_config_obs_t { int symlink(const char *existing, const char *newname); // inode stuff + unsigned statx_to_mask(unsigned int flags, unsigned int want); int stat(const char *path, struct stat *stbuf, frag_info_t *dirstat=0, int mask=CEPH_STAT_CAP_INODE_ALL); + int statx(const char *path, struct ceph_statx *stx, unsigned int want, unsigned int flags); int lstat(const char *path, struct stat *stbuf, frag_info_t *dirstat=0, int mask=CEPH_STAT_CAP_INODE_ALL); int lstatlite(const char *path, struct statlite *buf); @@ -1087,6 +1095,8 @@ class Client : public Dispatcher, public md_config_obs_t { bool ll_forget(Inode *in, int count); bool ll_put(Inode *in); int ll_getattr(Inode *in, struct stat *st, int uid = -1, int gid = -1); + int ll_getattrx(Inode *in, struct ceph_statx *stx, unsigned int want, + unsigned int flags, int uid = -1, int gid = -1); int ll_setattr(Inode *in, struct stat *st, int mask, int uid = -1, int gid = -1); int ll_getxattr(Inode *in, const char *name, void *value, size_t size, diff --git a/src/include/cephfs/ceph_statx.h b/src/include/cephfs/ceph_statx.h new file mode 100644 index 0000000000000..48ce0f9154a74 --- /dev/null +++ b/src/include/cephfs/ceph_statx.h @@ -0,0 +1,85 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * scalable distributed file system + * + * Copyright (C) Jeff Layton + * + * Heavily borrowed from David Howells' draft statx patchset. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_CEPH_STATX_H +#define CEPH_CEPH_STATX_H +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Since the xstat patches are still a work in progress, we borrow its data + * structures and #defines to implement ceph_getattrx. Once the xstat stuff + * has been merged we should drop this and switch over to using that instead. + */ +struct ceph_statx { + uint32_t stx_mask; + uint32_t stx_information; + uint32_t stx_blksize; + uint32_t stx_nlink; + uint32_t stx_gen; + uint32_t stx_uid; + uint32_t stx_gid; + uint16_t stx_mode; + uint16_t __spare0[1]; + uint64_t stx_ino; + uint64_t stx_size; + uint64_t stx_blocks; + uint64_t stx_version; + int64_t stx_atime; + int64_t stx_btime; + int64_t stx_ctime; + int64_t stx_mtime; + int32_t stx_atime_ns; + int32_t stx_btime_ns; + int32_t stx_ctime_ns; + int32_t stx_mtime_ns; + uint32_t stx_rdev_major; + uint32_t stx_rdev_minor; + uint32_t stx_dev_major; + uint32_t stx_dev_minor; + uint64_t __spare1[16]; +}; + +#define CEPH_STATX_MODE 0x00000001U /* Want/got stx_mode */ +#define CEPH_STATX_NLINK 0x00000002U /* Want/got stx_nlink */ +#define CEPH_STATX_UID 0x00000004U /* Want/got stx_uid */ +#define CEPH_STATX_GID 0x00000008U /* Want/got stx_gid */ +#define CEPH_STATX_RDEV 0x00000010U /* Want/got stx_rdev */ +#define CEPH_STATX_ATIME 0x00000020U /* Want/got stx_atime */ +#define CEPH_STATX_MTIME 0x00000040U /* Want/got stx_mtime */ +#define CEPH_STATX_CTIME 0x00000080U /* Want/got stx_ctime */ +#define CEPH_STATX_INO 0x00000100U /* Want/got stx_ino */ +#define CEPH_STATX_SIZE 0x00000200U /* Want/got stx_size */ +#define CEPH_STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ +#define CEPH_STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ +#define CEPH_STATX_BTIME 0x00000800U /* Want/got stx_btime */ +#define CEPH_STATX_GEN 0x00002000U /* Want/got stx_gen */ +#define CEPH_STATX_ALL_STATS 0x00003fffU /* All supported stats */ + +/* statx request flags. Callers can set these in the "flags" field */ +#ifndef AT_NO_ATTR_SYNC +#define AT_NO_ATTR_SYNC 0x4000 /* Don't sync attributes with the server */ +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* CEPH_STATX_H */ + diff --git a/src/include/cephfs/libcephfs.h b/src/include/cephfs/libcephfs.h index 491fd658ba63d..c550bcd7b02d2 100644 --- a/src/include/cephfs/libcephfs.h +++ b/src/include/cephfs/libcephfs.h @@ -27,6 +27,8 @@ #include #include +#include "ceph_statx.h" + #ifdef __cplusplus extern "C" { #endif @@ -618,6 +620,19 @@ int ceph_rename(struct ceph_mount_info *cmount, const char *from, const char *to */ int ceph_stat(struct ceph_mount_info *cmount, const char *path, struct stat *stbuf); +/** + * Get a file's extended statistics and attributes. + * + * @param cmount the ceph mount handle to use for performing the stat. + * @param path the file or directory to get the statistics of. + * @param stx the ceph_statx struct that will be filled in with the file's statistics. + * @param want bitfield of CEPH_STATX_* flags showing designed attributes + * @param flags bitfield that can be used to set AT_* modifier flags (only AT_NO_ATTR_SYNC and AT_SYMLINK_NOFOLLOW) + * @returns 0 on success or negative error code on failure. + */ +int ceph_statx(struct ceph_mount_info *cmount, const char *path, struct ceph_statx *stx, + unsigned int want, unsigned int flags); + /** * Get a file's statistics and attributes, without following symlinks. * @@ -1393,6 +1408,9 @@ int ceph_ll_walk(struct ceph_mount_info *cmount, const char *name, struct stat *attr); int ceph_ll_getattr(struct ceph_mount_info *cmount, struct Inode *in, struct stat *attr, int uid, int gid); +int ceph_ll_getattrx(struct ceph_mount_info *cmount, struct Inode *in, + struct ceph_statx *stx, unsigned int want, unsigned int flags, + int uid, int gid); int ceph_ll_setattr(struct ceph_mount_info *cmount, struct Inode *in, struct stat *st, int mask, int uid, int gid); int ceph_ll_open(struct ceph_mount_info *cmount, struct Inode *in, int flags, diff --git a/src/libcephfs.cc b/src/libcephfs.cc index 75653610fdea4..c01255e59063b 100644 --- a/src/libcephfs.cc +++ b/src/libcephfs.cc @@ -610,6 +610,14 @@ extern "C" int ceph_stat(struct ceph_mount_info *cmount, const char *path, return cmount->get_client()->stat(path, stbuf); } +extern "C" int ceph_statx(struct ceph_mount_info *cmount, const char *path, + struct ceph_statx *stx, unsigned int want, unsigned int flags) +{ + if (!cmount->is_mounted()) + return -ENOTCONN; + return cmount->get_client()->statx(path, stx, want, flags); +} + extern "C" int ceph_lstat(struct ceph_mount_info *cmount, const char *path, struct stat *stbuf) { @@ -1406,6 +1414,14 @@ extern "C" int ceph_ll_getattr(class ceph_mount_info *cmount, return (cmount->get_client()->ll_getattr(in, attr, uid, gid)); } +extern "C" int ceph_ll_getattrx(class ceph_mount_info *cmount, + Inode *in, struct ceph_statx *stx, + unsigned int want, unsigned int flags, + int uid, int gid) +{ + return (cmount->get_client()->ll_getattrx(in, stx, want, flags, uid, gid)); +} + extern "C" int ceph_ll_setattr(class ceph_mount_info *cmount, Inode *in, struct stat *st, int mask, int uid, int gid) diff --git a/src/test/libcephfs/test.cc b/src/test/libcephfs/test.cc index 62dfd785104c0..eeb54c6c5d8bf 100644 --- a/src/test/libcephfs/test.cc +++ b/src/test/libcephfs/test.cc @@ -1467,3 +1467,38 @@ TEST(LibCephFS, SlashDotDot) { ceph_shutdown(cmount); } + +TEST(LibCephFS, Btime) { + struct ceph_mount_info *cmount; + ASSERT_EQ(ceph_create(&cmount, NULL), 0); + ASSERT_EQ(ceph_conf_read_file(cmount, NULL), 0); + ASSERT_EQ(0, ceph_conf_parse_env(cmount, NULL)); + ASSERT_EQ(ceph_mount(cmount, "/"), 0); + + char filename[32]; + sprintf(filename, "/getattrx%x", getpid()); + + ceph_unlink(cmount, filename); + int fd = ceph_open(cmount, filename, O_RDWR|O_CREAT|O_EXCL, 0666); + ASSERT_LT(0, fd); + + struct ceph_statx stx; + ASSERT_EQ(ceph_statx(cmount, filename, &stx, CEPH_STATX_CTIME|CEPH_STATX_BTIME, 0), 0); + ASSERT_TRUE(stx.stx_mask & (CEPH_STATX_CTIME|CEPH_STATX_BTIME)); + ASSERT_EQ(stx.stx_btime, stx.stx_ctime); + ASSERT_EQ(stx.stx_btime_ns, stx.stx_ctime_ns); + + int64_t old_btime = stx.stx_btime; + int32_t old_btime_ns = stx.stx_btime_ns; + + /* Now sleep, do a chmod and verify that the ctime changed, but btime didn't */ + sleep(1); + ASSERT_EQ(ceph_chmod(cmount, filename, 0644), 0); + ASSERT_EQ(ceph_statx(cmount, filename, &stx, CEPH_STATX_BTIME, 0), 0); + ASSERT_TRUE(stx.stx_mask & CEPH_STATX_BTIME); + ASSERT_EQ(stx.stx_btime, old_btime); + ASSERT_EQ(stx.stx_btime_ns, old_btime_ns); + ASSERT_FALSE(old_btime == stx.stx_ctime && old_btime_ns == stx.stx_ctime_ns); + + ceph_shutdown(cmount); +} From 639b482903cc023cb8c82916b892f76774136f16 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:39 -0400 Subject: [PATCH 11/37] libcephfs: add a test for "lazy" statx Create 2 clients. Create a file in client1, and do a lookup of it in client2, and then ll_getattrx it from client2. chmod the file from client1, ll_getattrx it client2 (this time with AT_NO_ATTR_SYNC) and ensure that the ctime change is not seen. Signed-off-by: Jeff Layton --- src/test/libcephfs/test.cc | 49 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/test/libcephfs/test.cc b/src/test/libcephfs/test.cc index eeb54c6c5d8bf..16fde06889bb5 100644 --- a/src/test/libcephfs/test.cc +++ b/src/test/libcephfs/test.cc @@ -14,6 +14,7 @@ #include "gtest/gtest.h" #include "include/cephfs/libcephfs.h" +#include "include/stat.h" #include #include #include @@ -1502,3 +1503,51 @@ TEST(LibCephFS, Btime) { ceph_shutdown(cmount); } + +TEST(LibCephFS, LazyStatx) { + struct ceph_mount_info *cmount1, *cmount2; + ASSERT_EQ(ceph_create(&cmount1, NULL), 0); + ASSERT_EQ(ceph_create(&cmount2, NULL), 0); + ASSERT_EQ(ceph_conf_read_file(cmount1, NULL), 0); + ASSERT_EQ(ceph_conf_read_file(cmount2, NULL), 0); + ASSERT_EQ(0, ceph_conf_parse_env(cmount1, NULL)); + ASSERT_EQ(0, ceph_conf_parse_env(cmount2, NULL)); + ASSERT_EQ(ceph_mount(cmount1, "/"), 0); + ASSERT_EQ(ceph_mount(cmount2, "/"), 0); + + char filename[32]; + sprintf(filename, "lazystatx%x", getpid()); + + Inode *root1, *file1, *root2, *file2; + struct stat st; + Fh *fh; + + ASSERT_EQ(ceph_ll_lookup_root(cmount1, &root1), 0); + ceph_ll_unlink(cmount1, root1, filename, getuid(), getgid()); + ASSERT_EQ(ceph_ll_create(cmount1, root1, filename, 0666, O_RDWR|O_CREAT|O_EXCL, + &st, &file1, &fh, getuid(), getgid()), 0); + + + ASSERT_EQ(ceph_ll_lookup_root(cmount2, &root2), 0); + ASSERT_EQ(ceph_ll_lookup(cmount2, root2, filename, &st, &file2, getuid(), getgid()), 0); + + int64_t old_ctime = stat_get_ctime_sec(&st); + int32_t old_ctime_ns = stat_get_ctime_nsec(&st); + + /* + * Now sleep, do a chmod on the first client and the see whether we get a + * different ctime with a statx that uses AT_NO_ATTR_SYNC + */ + sleep(1); + st.st_mode = 0644; + ASSERT_EQ(ceph_ll_setattr(cmount1, file1, &st, CEPH_SETATTR_MODE, getuid(), getgid()), 0); + + struct ceph_statx stx; + ASSERT_EQ(ceph_ll_getattrx(cmount2, file2, &stx, CEPH_STATX_CTIME, AT_NO_ATTR_SYNC, getuid(), getgid()), 0); + ASSERT_TRUE(stx.stx_mask & CEPH_STATX_CTIME); + ASSERT_EQ(stx.stx_ctime, old_ctime); + ASSERT_EQ(stx.stx_ctime_ns, old_ctime_ns); + + ceph_shutdown(cmount1); + ceph_shutdown(cmount2); +} From d0889aa5faf910955fcb769b2b17132dcc389bf3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:39 -0400 Subject: [PATCH 12/37] mds/client: add btime to CapSnap and MClientCaps Currently we don't have a mechanism to set the btime, but we will need that eventually. If we want to allow the client to cache that change, we need to be able to pass it back and forth between client and server. Signed-off-by: Jeff Layton --- src/client/Client.cc | 5 +++++ src/client/Inode.h | 2 +- src/mds/Locker.cc | 6 ++++++ src/messages/MClientCaps.h | 9 +++++++-- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 6234ea4b26b74..e0726aa7339d0 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -3235,6 +3235,7 @@ void Client::send_cap(Inode *in, MetaSession *session, Cap *cap, m->mtime = in->mtime; m->atime = in->atime; m->ctime = in->ctime; + m->btime = in->btime; m->time_warp_seq = in->time_warp_seq; if (flush & CEPH_CAP_FILE_WR) { @@ -3421,6 +3422,7 @@ void Client::queue_cap_snap(Inode *in, SnapContext& old_snapc) capsnap->uid = in->uid; capsnap->gid = in->gid; capsnap->mode = in->mode; + capsnap->btime = in->btime; capsnap->xattrs = in->xattrs; capsnap->xattr_version = in->xattr_version; @@ -3519,6 +3521,7 @@ void Client::flush_snaps(Inode *in, bool all_again) m->head.uid = capsnap->uid; m->head.gid = capsnap->gid; m->head.mode = capsnap->mode; + m->btime = capsnap->btime; m->size = capsnap->size; @@ -3526,6 +3529,7 @@ void Client::flush_snaps(Inode *in, bool all_again) ::encode(capsnap->xattrs, m->xattrbl); m->ctime = capsnap->ctime; + m->btime = capsnap->btime; m->mtime = capsnap->mtime; m->atime = capsnap->atime; m->time_warp_seq = capsnap->time_warp_seq; @@ -4869,6 +4873,7 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient in->mode = m->head.mode; in->uid = m->head.uid; in->gid = m->head.gid; + in->btime = m->btime; } bool deleted_inode = false; if ((issued & CEPH_CAP_LINK_EXCL) == 0) { diff --git a/src/client/Inode.h b/src/client/Inode.h index d17475504aed2..28fc439a429f8 100644 --- a/src/client/Inode.h +++ b/src/client/Inode.h @@ -51,7 +51,7 @@ struct CapSnap { int issued, dirty; uint64_t size; - utime_t ctime, mtime, atime; + utime_t ctime, btime, mtime, atime; version_t time_warp_seq; uint32_t mode; uid_t uid; diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 966a5f2c12fab..3ca5efae1f073 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -3039,6 +3039,12 @@ void Locker::_update_cap_fields(CInode *in, int dirty, MClientCaps *m, inode_t * << " for " << *in << dendl; pi->mode = m->head.mode; } + if (m->get_btime() != pi->btime) { + dout(7) << " btime " << oct << pi->btime + << " -> " << m->get_btime() << dec + << " for " << *in << dendl; + pi->btime = m->get_btime(); + } } } diff --git a/src/messages/MClientCaps.h b/src/messages/MClientCaps.h index 2fabbc7dea477..a210e44284ff0 100644 --- a/src/messages/MClientCaps.h +++ b/src/messages/MClientCaps.h @@ -20,7 +20,7 @@ class MClientCaps : public Message { - static const int HEAD_VERSION = 8; + static const int HEAD_VERSION = 9; static const int COMPAT_VERSION = 1; public: @@ -28,7 +28,7 @@ class MClientCaps : public Message { uint64_t size, max_size, truncate_size; uint32_t truncate_seq; - utime_t mtime, atime, ctime; + utime_t mtime, atime, ctime, btime; file_layout_t layout; uint32_t time_warp_seq; @@ -62,6 +62,7 @@ class MClientCaps : public Message { __u32 get_truncate_seq() { return truncate_seq; } uint64_t get_truncate_size() { return truncate_size; } utime_t get_ctime() { return ctime; } + utime_t get_btime() { return btime; } utime_t get_mtime() { return mtime; } utime_t get_atime() { return atime; } __u32 get_time_warp_seq() { return time_warp_seq; } @@ -255,6 +256,9 @@ class MClientCaps : public Message { if (header.version >= 8) { ::decode(layout.pool_ns, p); } + if (header.version >= 9) { + ::decode(btime, p); + } } void encode_payload(uint64_t features) { header.version = HEAD_VERSION; @@ -311,6 +315,7 @@ class MClientCaps : public Message { ::encode(caller_gid, payload); ::encode(layout.pool_ns, payload); + ::encode(btime, payload); } }; From 6615553e8e8802a754901ddbe473274dc31f4351 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:39 -0400 Subject: [PATCH 13/37] mds/mdstypes: add change attribute to inode Signed-off-by: Jeff Layton --- src/mds/mdstypes.cc | 5 +++++ src/mds/mdstypes.h | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc index 6432a2c0cf509..20aa36fa156f8 100644 --- a/src/mds/mdstypes.cc +++ b/src/mds/mdstypes.cc @@ -286,6 +286,7 @@ void inode_t::encode(bufferlist &bl, uint64_t features) const ::encode(last_scrub_stamp, bl); ::encode(btime, bl); + ::encode(change_attr, bl); ENCODE_FINISH(bl); } @@ -367,8 +368,10 @@ void inode_t::decode(bufferlist::iterator &p) } if (struct_v >= 14) { ::decode(btime, p); + ::decode(change_attr, p); } else { btime = utime_t(); + change_attr = 0; } DECODE_FINISH(p); @@ -406,6 +409,7 @@ void inode_t::dump(Formatter *f) const f->dump_stream("mtime") << mtime; f->dump_stream("atime") << atime; f->dump_unsigned("time_warp_seq", time_warp_seq); + f->dump_unsigned("change_attr", change_attr); f->open_array_section("client_ranges"); for (map::const_iterator p = client_ranges.begin(); p != client_ranges.end(); ++p) { @@ -465,6 +469,7 @@ int inode_t::compare(const inode_t &other, bool *divergent) const truncate_size != other.truncate_size || truncate_from != other.truncate_from || truncate_pending != other.truncate_pending || + change_attr != other.change_attr || mtime != other.mtime || atime != other.atime || time_warp_seq != other.time_warp_seq || diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index a358d354f2ed6..c7dd37b6b8fff 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -494,6 +494,9 @@ struct inode_t { uint32_t time_warp_seq; // count of (potential) mtime/atime timewarps (i.e., utimes()) inline_data_t inline_data; + // change attribute + uint64_t change_attr; + std::map client_ranges; // client(s) can write to these ranges // dirfrag, recursive accountin @@ -522,7 +525,7 @@ struct inode_t { size(0), max_size_ever(0), truncate_seq(0), truncate_size(0), truncate_from(0), truncate_pending(0), - time_warp_seq(0), + time_warp_seq(0), change_attr(0), version(0), file_data_version(0), xattr_version(0), last_scrub_version(0), backtrace_version(0) { clear_layout(); From d7e185c6a91b44ac6bb1284a5184ceb0ead4c4d3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:39 -0400 Subject: [PATCH 14/37] mds/Server: set change_attr to 0 on creation Signed-off-by: Jeff Layton --- src/mds/Server.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 5cf18b1122bc1..e9ddbf3a4431b 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2357,6 +2357,8 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino in->inode.btime = in->inode.ctime = in->inode.mtime = in->inode.atime = mdr->get_op_stamp(); + in->inode.change_attr = 0; + MClientRequest *req = mdr->client_request; if (req->get_data().length()) { bufferlist::iterator p = req->get_data().begin(); From 90cbea83e8a2389e4616b4a68f04b6e8b5b170a9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:39 -0400 Subject: [PATCH 15/37] mds/MDCache: set change_attr to 0 on system_inodes Signed-off-by: Jeff Layton --- src/mds/MDCache.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 3a1f5199fe270..c95dacb8d5a0c 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -359,6 +359,7 @@ void MDCache::create_unlinked_system_inode(CInode *in, inodeno_t ino, in->inode.btime = ceph_clock_now(g_ceph_context); in->inode.nlink = 1; in->inode.truncate_size = -1ull; + in->inode.change_attr = 0; memset(&in->inode.dir_layout, 0, sizeof(in->inode.dir_layout)); if (in->inode.is_dir()) { From 6e7682b47e8d4d3be3dc3818d5bdd2b16bfc5a27 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:39 -0400 Subject: [PATCH 16/37] include/ceph_features: add FS_CHANGE_ATTR feature ...that is mapped to BTIME feature. Signed-off-by: Jeff Layton --- src/include/ceph_features.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h index 27f0890a89c74..197a7097c5faa 100755 --- a/src/include/ceph_features.h +++ b/src/include/ceph_features.h @@ -85,6 +85,7 @@ #define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING (1ULL<<58) /* New, v7 encoding */ #define CEPH_FEATURE_FS_FILE_LAYOUT_V2 (1ULL<<58) /* file_layout_t */ #define CEPH_FEATURE_FS_BTIME (1ULL<<59) /* btime */ +#define CEPH_FEATURE_FS_CHANGE_ATTR (1ULL<<59) /* change_attr */ #define CEPH_FEATURE_RESERVED2 (1ULL<<61) /* slow down, we are almost out... */ #define CEPH_FEATURE_RESERVED (1ULL<<62) /* DO NOT USE THIS ... last bit! */ @@ -182,6 +183,7 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) { CEPH_FEATURE_FS_FILE_LAYOUT_V2 | \ CEPH_FEATURE_SERVER_KRAKEN | \ CEPH_FEATURE_FS_BTIME | \ + CEPH_FEATURE_FS_CHANGE_ATTR | \ 0ULL) #define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL From b07a707ad0ea31b1ef34ea949751d14c55bc1e8a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:39 -0400 Subject: [PATCH 17/37] mds: pass change_attr in InodeStat MClientReply, if feature is present Signed-off-by: Jeff Layton --- src/mds/CInode.cc | 1 + src/messages/MClientReply.h | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 11f50fd97d469..365f3ab812e85 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -3359,6 +3359,7 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, } if (session->connection->has_feature(CEPH_FEATURE_FS_BTIME)) { ::encode(any_i->btime, bl); + ::encode(any_i->change_attr, bl); } return valid; diff --git a/src/messages/MClientReply.h b/src/messages/MClientReply.h index a997448197533..c5e7d3912deb9 100644 --- a/src/messages/MClientReply.h +++ b/src/messages/MClientReply.h @@ -105,6 +105,7 @@ struct InodeStat { utime_t ctime, btime, mtime, atime; uint32_t time_warp_seq; uint64_t size, max_size; + uint64_t change_attr; uint64_t truncate_size; uint32_t truncate_seq; uint32_t mode, uid, gid, nlink; @@ -185,10 +186,13 @@ struct InodeStat { if ((features & CEPH_FEATURE_FS_FILE_LAYOUT_V2)) ::decode(layout.pool_ns, p); - if ((features & CEPH_FEATURE_FS_BTIME)) + if ((features & CEPH_FEATURE_FS_BTIME)) { ::decode(btime, p); - else + ::decode(change_attr, p); + } else { btime = utime_t(); + change_attr = 0; + } } // see CInode::encode_inodestat for encoder. From d3ff304a7aab6563b0e5d45585fdb1cd64309d0f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:40 -0400 Subject: [PATCH 18/37] mds/client: keep change_attr in Inode, CapSnap and MClientCaps ...and pass it around appropriately. Signed-off-by: Jeff Layton --- src/client/Client.cc | 24 +++++++++++++++--------- src/client/Client.h | 9 ++++----- src/client/Inode.cc | 1 + src/client/Inode.h | 12 +++++++----- src/mds/CInode.cc | 1 + src/mds/Locker.cc | 6 ++++++ src/messages/MClientCaps.h | 8 +++++++- 7 files changed, 41 insertions(+), 20 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index e0726aa7339d0..13c11a835be55 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -704,7 +704,7 @@ void Client::trim_dentry(Dentry *dn) void Client::update_inode_file_bits(Inode *in, uint64_t truncate_seq, uint64_t truncate_size, - uint64_t size, + uint64_t size, uint64_t change_attr, uint64_t time_warp_seq, utime_t ctime, utime_t mtime, utime_t atime, @@ -770,6 +770,8 @@ void Client::update_inode_file_bits(Inode *in, ldout(cct, 30) << "Yay have enough caps to look at our times" << dendl; if (ctime > in->ctime) in->ctime = ctime; + if (change_attr > in->change_attr) + in->change_attr = change_attr; if (time_warp_seq > in->time_warp_seq) { ldout(cct, 10) << "mds time_warp_seq " << time_warp_seq << " on inode " << *in << " is higher than local time_warp_seq " @@ -889,9 +891,9 @@ Inode * Client::add_update_inode(InodeStat *st, utime_t from, } update_inode_file_bits(in, st->truncate_seq, st->truncate_size, st->size, - st->time_warp_seq, st->ctime, st->mtime, st->atime, - st->inline_version, st->inline_data, - issued); + st->change_attr, st->time_warp_seq, st->ctime, + st->mtime, st->atime, st->inline_version, + st->inline_data, issued); } else if (st->inline_version > in->inline_version) { in->inline_data = st->inline_data; in->inline_version = st->inline_version; @@ -3237,6 +3239,7 @@ void Client::send_cap(Inode *in, MetaSession *session, Cap *cap, m->ctime = in->ctime; m->btime = in->btime; m->time_warp_seq = in->time_warp_seq; + m->change_attr = in->change_attr; if (flush & CEPH_CAP_FILE_WR) { m->inline_version = in->inline_version; @@ -3445,6 +3448,7 @@ void Client::finish_cap_snap(Inode *in, CapSnap *capsnap, int used) capsnap->atime = in->atime; capsnap->ctime = in->ctime; capsnap->time_warp_seq = in->time_warp_seq; + capsnap->change_attr = in->change_attr; capsnap->dirty |= in->caps_dirty(); @@ -3533,6 +3537,7 @@ void Client::flush_snaps(Inode *in, bool all_again) m->mtime = capsnap->mtime; m->atime = capsnap->atime; m->time_warp_seq = capsnap->time_warp_seq; + m->change_attr = capsnap->change_attr; if (capsnap->dirty & CEPH_CAP_FILE_WR) { m->inline_version = in->inline_version; @@ -4665,10 +4670,9 @@ void Client::handle_cap_trunc(MetaSession *session, Inode *in, MClientCaps *m) int issued = in->caps_issued(&implemented) | in->caps_dirty(); issued |= implemented; update_inode_file_bits(in, m->get_truncate_seq(), m->get_truncate_size(), - m->get_size(), m->get_time_warp_seq(), m->get_ctime(), - m->get_mtime(), m->get_atime(), - m->inline_version, m->inline_data, - issued); + m->get_size(), m->get_change_attr(), m->get_time_warp_seq(), + m->get_ctime(), m->get_mtime(), m->get_atime(), + m->inline_version, m->inline_data, issued); m->put(); } @@ -4890,7 +4894,8 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient in->xattr_version = m->head.xattr_version; } update_inode_file_bits(in, m->get_truncate_seq(), m->get_truncate_size(), m->get_size(), - m->get_time_warp_seq(), m->get_ctime(), m->get_mtime(), m->get_atime(), + m->get_change_attr(), m->get_time_warp_seq(), m->get_ctime(), + m->get_mtime(), m->get_atime(), m->inline_version, m->inline_data, issued); // max_size @@ -9623,6 +9628,7 @@ Inode *Client::open_snapdir(Inode *diri) in->ctime = diri->ctime; in->btime = diri->btime; in->size = diri->size; + in->change_attr = diri->change_attr; in->dirfragtree.clear(); in->snapdir_parent = diri; diff --git a/src/client/Client.h b/src/client/Client.h index 520f000a3e095..4413d4b459106 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -707,11 +707,10 @@ class Client : public Dispatcher, public md_config_obs_t { void clear_dir_complete_and_ordered(Inode *diri, bool complete); void insert_readdir_results(MetaRequest *request, MetaSession *session, Inode *diri); Inode* insert_trace(MetaRequest *request, MetaSession *session); - void update_inode_file_bits(Inode *in, - uint64_t truncate_seq, uint64_t truncate_size, uint64_t size, - uint64_t time_warp_seq, utime_t ctime, utime_t mtime, utime_t atime, - version_t inline_version, bufferlist& inline_data, - int issued); + void update_inode_file_bits(Inode *in, uint64_t truncate_seq, uint64_t truncate_size, uint64_t size, + uint64_t change_attr, uint64_t time_warp_seq, utime_t ctime, + utime_t mtime, utime_t atime, version_t inline_version, + bufferlist& inline_data, int issued); Inode *add_update_inode(InodeStat *st, utime_t ttl, MetaSession *session); Dentry *insert_dentry_inode(Dir *dir, const string& dname, LeaseStat *dlease, Inode *in, utime_t from, MetaSession *session, diff --git a/src/client/Inode.cc b/src/client/Inode.cc index 08d6ab31e4440..0fd6c0fbab7a2 100644 --- a/src/client/Inode.cc +++ b/src/client/Inode.cc @@ -359,6 +359,7 @@ void Inode::dump(Formatter *f) const f->dump_stream("mtime") << mtime; f->dump_stream("atime") << atime; f->dump_int("time_warp_seq", time_warp_seq); + f->dump_int("change_attr", change_attr); f->dump_object("layout", layout); if (is_dir()) { diff --git a/src/client/Inode.h b/src/client/Inode.h index 28fc439a429f8..e15c48cff05d8 100644 --- a/src/client/Inode.h +++ b/src/client/Inode.h @@ -53,6 +53,7 @@ struct CapSnap { uint64_t size; utime_t ctime, btime, mtime, atime; version_t time_warp_seq; + uint64_t change_attr; uint32_t mode; uid_t uid; gid_t gid; @@ -66,9 +67,9 @@ struct CapSnap { uint64_t flush_tid; explicit CapSnap(Inode *i) - : in(i), issued(0), dirty(0), - size(0), time_warp_seq(0), mode(0), uid(0), gid(0), xattr_version(0), - inline_version(0), writing(false), dirty_data(false), flush_tid(0) + : in(i), issued(0), dirty(0), size(0), time_warp_seq(0), change_attr(0), + mode(0), uid(0), gid(0), xattr_version(0), inline_version(0), + writing(false), dirty_data(false), flush_tid(0) {} void dump(Formatter *f) const; @@ -111,6 +112,7 @@ struct Inode { utime_t mtime; // file data modify time. utime_t atime; // file data access time. uint32_t time_warp_seq; // count of (potential) mtime/atime timewarps (i.e., utimes()) + uint64_t change_attr; uint64_t max_size; // max size we can write to @@ -227,8 +229,8 @@ struct Inode { : client(c), ino(vino.ino), snapid(vino.snapid), faked_ino(0), rdev(0), mode(0), uid(0), gid(0), nlink(0), size(0), truncate_seq(1), truncate_size(-1), - time_warp_seq(0), max_size(0), version(0), xattr_version(0), - inline_version(0), flags(0), + time_warp_seq(0), change_attr(0), max_size(0), version(0), + xattr_version(0), inline_version(0), flags(0), dir(0), dir_release_count(1), dir_ordered_count(1), dir_hashed(false), dir_replicated(false), auth_cap(NULL), cap_dirtier_uid(-1), cap_dirtier_gid(-1), diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 365f3ab812e85..d24aab64ced60 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -3392,6 +3392,7 @@ void CInode::encode_cap_message(MClientCaps *m, Capability *cap) m->mtime = i->mtime; m->atime = i->atime; m->ctime = i->ctime; + m->change_attr = i->change_attr; m->time_warp_seq = i->time_warp_seq; if (cap->client_inline_version < i->inline_data.version) { diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 3ca5efae1f073..b65dd9f286dc4 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -2978,6 +2978,12 @@ void Locker::_update_cap_fields(CInode *in, int dirty, MClientCaps *m, inode_t * pi->ctime = m->get_ctime(); } + if (dirty && m->get_change_attr() > pi->change_attr) { + dout(7) << " change_attr " << pi->change_attr << " -> " << m->get_change_attr() + << " for " << *in << dendl; + pi->change_attr = m->get_change_attr(); + } + // file if (dirty & (CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) { utime_t atime = m->get_atime(); diff --git a/src/messages/MClientCaps.h b/src/messages/MClientCaps.h index a210e44284ff0..7153f7fbbc0f0 100644 --- a/src/messages/MClientCaps.h +++ b/src/messages/MClientCaps.h @@ -26,7 +26,7 @@ class MClientCaps : public Message { public: struct ceph_mds_caps_head head; - uint64_t size, max_size, truncate_size; + uint64_t size, max_size, truncate_size, change_attr; uint32_t truncate_seq; utime_t mtime, atime, ctime, btime; file_layout_t layout; @@ -65,6 +65,7 @@ class MClientCaps : public Message { utime_t get_btime() { return btime; } utime_t get_mtime() { return mtime; } utime_t get_atime() { return atime; } + __u64 get_change_attr() { return change_attr; } __u32 get_time_warp_seq() { return time_warp_seq; } const file_layout_t& get_layout() { @@ -111,6 +112,7 @@ class MClientCaps : public Message { size(0), max_size(0), truncate_size(0), + change_attr(0), truncate_seq(0), time_warp_seq(0), osd_epoch_barrier(0), @@ -132,6 +134,7 @@ class MClientCaps : public Message { size(0), max_size(0), truncate_size(0), + change_attr(0), truncate_seq(0), time_warp_seq(0), osd_epoch_barrier(oeb), @@ -157,6 +160,7 @@ class MClientCaps : public Message { size(0), max_size(0), truncate_size(0), + change_attr(0), truncate_seq(0), time_warp_seq(0), osd_epoch_barrier(oeb), @@ -258,6 +262,7 @@ class MClientCaps : public Message { } if (header.version >= 9) { ::decode(btime, p); + ::decode(change_attr, p); } } void encode_payload(uint64_t features) { @@ -316,6 +321,7 @@ class MClientCaps : public Message { ::encode(layout.pool_ns, payload); ::encode(btime, payload); + ::encode(change_attr, payload); } }; From 902566840bedba0b3115e704291a3f1131b1904f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 10:33:10 -0400 Subject: [PATCH 19/37] mds: ensure that change_attr reflects metadata changes on clients that hold CAP_FILE_EXCL Suppose we have two clients. client1 holds FILE_EXCL cap and client2 holds AUTH_EXCL. Both have the change_attr at the same value (call it 1). client1 does 2 writes and its change_attr goes to 3. The client1 then queries for the change_attr and gets back 3 from the cache. The MDS then recalls FILE_EXCL from client1 and now the MDS and client1 have the same change_attr (3). client2 then does a chmod on the file, and its change_attr goes to 2. client1 then does a statx with STX_VERSION|STX_MODE. The MDS recalls the AUTH_EXCL cap from client2, the change_attr in the MClientCaps is less than the one in the MDS inode, so it gets discarded. client1 then sees a new mode but the change_attr value has not changed, which violates the rules. Fix this with an extra increment of the MDS copy of the change_attr when the caps being returned are dirty, and they don't contain exclusive write caps. Signed-off-by: Jeff Layton --- src/mds/Locker.cc | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index b65dd9f286dc4..6a262f34e979a 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -2978,12 +2978,6 @@ void Locker::_update_cap_fields(CInode *in, int dirty, MClientCaps *m, inode_t * pi->ctime = m->get_ctime(); } - if (dirty && m->get_change_attr() > pi->change_attr) { - dout(7) << " change_attr " << pi->change_attr << " -> " << m->get_change_attr() - << " for " << *in << dendl; - pi->change_attr = m->get_change_attr(); - } - // file if (dirty & (CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) { utime_t atime = m->get_atime(); @@ -3053,6 +3047,24 @@ void Locker::_update_cap_fields(CInode *in, int dirty, MClientCaps *m, inode_t * } } + /* + * Different clients can hold different exclusive caps on the same inode, + * but the change_attr field is under the aegis of the FILE cap. If the + * client isn't returning the FILE_EXCL cap, and the change_attr in the + * MClientCaps is not newer than the one in the inode, then we must bump it + * an extra time to ensure that any client that _does_ hold FILE_EXCL sees + * a new change_attr. + */ + if (dirty) { + if (m->get_change_attr() > pi->change_attr) { + dout(7) << " change_attr " << pi->change_attr << " -> " << m->get_change_attr() + << " for " << *in << dendl; + pi->change_attr = m->get_change_attr(); + } else if (!(dirty & (CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) { + pi->change_attr++; + } + } + } /* From b77605f9d0b4ab7de92523cefeefae91c971baea Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:40 -0400 Subject: [PATCH 20/37] mds/client: bump the change_attr at the appropriate time for files The semantics for a change_attr are that it should be incremented whenever there is a change to the ctime in the inode. Add those increments for the simple case of regular files. Directories however can be fragmented so we'll need to do something more elaborate there. Signed-off-by: Jeff Layton --- src/client/Client.cc | 9 ++++++++- src/mds/Server.cc | 9 +++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 13c11a835be55..95abf60db9203 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -6524,13 +6524,16 @@ int Client::_do_setattr(Inode *in, struct stat *attr, int mask, int uid, int gid mask &= ~(CEPH_SETATTR_MTIME|CEPH_SETATTR_ATIME); } } - if (!mask) + if (!mask) { + in->change_attr++; return 0; + } force_request: MetaRequest *req = new MetaRequest(CEPH_MDS_OP_SETATTR); filepath path; + in->make_nosnap_relative_path(path); req->set_filepath(path); req->set_inode(in); @@ -8796,6 +8799,7 @@ int Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf, // mtime in->mtime = ceph_clock_now(cct); + in->change_attr++; mark_caps_dirty(in, CEPH_CAP_FILE_WR); done: @@ -11829,6 +11833,7 @@ int Client::_fallocate(Fh *fh, int mode, int64_t offset, int64_t length) in->inline_version++; } in->mtime = ceph_clock_now(cct); + in->change_attr++; mark_caps_dirty(in, CEPH_CAP_FILE_WR); } else { if (in->inline_version < CEPH_INLINE_NONE) { @@ -11856,6 +11861,7 @@ int Client::_fallocate(Fh *fh, int mode, int64_t offset, int64_t length) 0, true, onfinish, new C_OnFinisher(onsafe, &objecter_finisher)); in->mtime = ceph_clock_now(cct); + in->change_attr++; mark_caps_dirty(in, CEPH_CAP_FILE_WR); client_lock.Unlock(); @@ -11870,6 +11876,7 @@ int Client::_fallocate(Fh *fh, int mode, int64_t offset, int64_t length) if (size > in->size) { in->size = size; in->mtime = ceph_clock_now(cct); + in->change_attr++; mark_caps_dirty(in, CEPH_CAP_FILE_WR); if (is_quota_bytes_approaching(in)) { diff --git a/src/mds/Server.cc b/src/mds/Server.cc index e9ddbf3a4431b..3ad1654b52493 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3835,6 +3835,7 @@ void Server::handle_client_setattr(MDRequestRef& mdr) pi->version = cur->pre_dirty(); pi->ctime = mdr->get_op_stamp(); + pi->change_attr++; // log + wait le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); @@ -3869,6 +3870,7 @@ void Server::do_open_truncate(MDRequestRef& mdr, int cmode) inode_t *pi = in->project_inode(); pi->version = in->pre_dirty(); pi->mtime = pi->ctime = mdr->get_op_stamp(); + pi->change_attr++; uint64_t old_size = MAX(pi->size, mdr->client_request->head.args.open.old_size); if (old_size > 0) { @@ -3984,6 +3986,7 @@ void Server::handle_client_setlayout(MDRequestRef& mdr) pi->add_old_pool(old_pool); pi->version = cur->pre_dirty(); pi->ctime = mdr->get_op_stamp(); + pi->change_attr++; // log + wait mdr->ls = mdlog->get_current_segment(); @@ -4503,6 +4506,7 @@ void Server::handle_client_setxattr(MDRequestRef& mdr) inode_t *pi = cur->project_inode(px); pi->version = cur->pre_dirty(); pi->ctime = mdr->get_op_stamp(); + pi->change_attr++; pi->xattr_version++; px->erase(name); if (!(flags & CEPH_XATTR_REMOVE)) { @@ -4564,6 +4568,7 @@ void Server::handle_client_removexattr(MDRequestRef& mdr) inode_t *pi = cur->project_inode(px); pi->version = cur->pre_dirty(); pi->ctime = mdr->get_op_stamp(); + pi->change_attr++; pi->xattr_version++; px->erase(name); @@ -4955,6 +4960,7 @@ void Server::_link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti) inode_t *pi = targeti->project_inode(); pi->nlink++; pi->ctime = mdr->get_op_stamp(); + pi->change_attr++; pi->version = tipv; // log + wait @@ -5604,6 +5610,7 @@ void Server::_unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn) mdr->add_projected_inode(in); // do this _after_ my dn->pre_dirty().. we apply that one manually. pi->version = in->pre_dirty(); pi->ctime = mdr->get_op_stamp(); + pi->change_attr++; pi->nlink--; if (pi->nlink == 0) in->state_set(CInode::STATE_ORPHAN); @@ -6787,11 +6794,13 @@ void Server::_rename_prepare(MDRequestRef& mdr, if (!silent) { if (pi) { pi->ctime = mdr->get_op_stamp(); + pi->change_attr++; if (linkmerge) pi->nlink--; } if (tpi) { tpi->ctime = mdr->get_op_stamp(); + tpi->change_attr++; destdn->make_path_string(tpi->stray_prior_path); tpi->nlink--; if (tpi->nlink == 0) From 50605be79988c2512d0696f16225db85ccbb7362 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:40 -0400 Subject: [PATCH 21/37] libcephfs: return the change_attr in the statx.stx_version field Signed-off-by: Jeff Layton --- src/client/Client.cc | 6 ++++-- src/include/cephfs/ceph_statx.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 95abf60db9203..ddf38f9beff1a 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -6677,7 +6677,7 @@ unsigned Client::statx_to_mask(unsigned int flags, unsigned int want) mask |= CEPH_CAP_AUTH_SHARED; if (want & CEPH_STATX_NLINK) mask |= CEPH_CAP_LINK_SHARED; - if (want & (CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME|CEPH_STATX_SIZE|CEPH_STATX_BLOCKS)) + if (want & (CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME|CEPH_STATX_SIZE|CEPH_STATX_BLOCKS|CEPH_STATX_VERSION)) mask |= CEPH_CAP_FILE_SHARED; out: @@ -6836,6 +6836,7 @@ void Client::fill_statx(Inode *in, unsigned int mask, struct ceph_statx *stx) stx->stx_atime_ns = in->atime.nsec(); stx->stx_mtime = in->mtime.sec(); stx->stx_mtime_ns = in->mtime.nsec(); + stx->stx_version = in->change_attr; if (in->is_dir()) { if (cct->_conf->client_dirsize_rbytes) @@ -6847,7 +6848,8 @@ void Client::fill_statx(Inode *in, unsigned int mask, struct ceph_statx *stx) stx->stx_size = in->size; stx->stx_blocks = (in->size + 511) >> 9; } - stx->stx_mask |= (CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME|CEPH_STATX_SIZE|CEPH_STATX_BLOCKS); + stx->stx_mask |= (CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME| + CEPH_STATX_SIZE|CEPH_STATX_BLOCKS|CEPH_STATX_VERSION); } } diff --git a/src/include/cephfs/ceph_statx.h b/src/include/cephfs/ceph_statx.h index 48ce0f9154a74..94870702d10dc 100644 --- a/src/include/cephfs/ceph_statx.h +++ b/src/include/cephfs/ceph_statx.h @@ -69,6 +69,7 @@ struct ceph_statx { #define CEPH_STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ #define CEPH_STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define CEPH_STATX_BTIME 0x00000800U /* Want/got stx_btime */ +#define CEPH_STATX_VERSION 0x00001000U /* Want/got stx_version */ #define CEPH_STATX_GEN 0x00002000U /* Want/got stx_gen */ #define CEPH_STATX_ALL_STATS 0x00003fffU /* All supported stats */ From 925e0e739fcb582efa5b72a9e951565aabda0966 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:40 -0400 Subject: [PATCH 22/37] tests: add a ChangeAttr test Signed-off-by: Jeff Layton --- src/test/libcephfs/test.cc | 44 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/test/libcephfs/test.cc b/src/test/libcephfs/test.cc index 16fde06889bb5..8e57a1d9749bc 100644 --- a/src/test/libcephfs/test.cc +++ b/src/test/libcephfs/test.cc @@ -1551,3 +1551,47 @@ TEST(LibCephFS, LazyStatx) { ceph_shutdown(cmount1); ceph_shutdown(cmount2); } + +TEST(LibCephFS, ChangeAttr) { + struct ceph_mount_info *cmount; + ASSERT_EQ(ceph_create(&cmount, NULL), 0); + ASSERT_EQ(ceph_conf_read_file(cmount, NULL), 0); + ASSERT_EQ(0, ceph_conf_parse_env(cmount, NULL)); + ASSERT_EQ(ceph_mount(cmount, "/"), 0); + + char filename[32]; + sprintf(filename, "/changeattr%x", getpid()); + + ceph_unlink(cmount, filename); + int fd = ceph_open(cmount, filename, O_RDWR|O_CREAT|O_EXCL, 0666); + ASSERT_LT(0, fd); + + struct ceph_statx stx; + ASSERT_EQ(ceph_statx(cmount, filename, &stx, CEPH_STATX_VERSION, 0), 0); + ASSERT_TRUE(stx.stx_mask & CEPH_STATX_VERSION); + + uint64_t old_change_attr = stx.stx_version; + + /* do chmod, and check whether change_attr changed */ + ASSERT_EQ(ceph_chmod(cmount, filename, 0644), 0); + ASSERT_EQ(ceph_statx(cmount, filename, &stx, CEPH_STATX_VERSION, 0), 0); + ASSERT_TRUE(stx.stx_mask & CEPH_STATX_VERSION); + ASSERT_NE(stx.stx_version, old_change_attr); + old_change_attr = stx.stx_version; + + /* now do a write and see if it changed again */ + ASSERT_EQ(3, ceph_write(cmount, fd, "foo", 3, 0)); + ASSERT_EQ(ceph_statx(cmount, filename, &stx, CEPH_STATX_VERSION, 0), 0); + ASSERT_TRUE(stx.stx_mask & CEPH_STATX_VERSION); + ASSERT_NE(stx.stx_version, old_change_attr); + old_change_attr = stx.stx_version; + + /* Now truncate and check again */ + ASSERT_EQ(0, ceph_ftruncate(cmount, fd, 0)); + ASSERT_EQ(ceph_statx(cmount, filename, &stx, CEPH_STATX_VERSION, 0), 0); + ASSERT_TRUE(stx.stx_mask & CEPH_STATX_VERSION); + ASSERT_NE(stx.stx_version, old_change_attr); + + ceph_close(cmount, fd); + ceph_shutdown(cmount); +} From 4d1eade0729f5935aba5f53064aa6566c3573772 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:40 -0400 Subject: [PATCH 23/37] mds: make frag_info_t add_dirty() function take a pointer to touched_mtime ...rather than messing around with references. While we're at it, we can also make the argument optional, which allows us to drop an unused stack variable from CDir::split. Signed-off-by: Jeff Layton --- src/mds/CDir.cc | 9 ++++----- src/mds/CInode.cc | 2 +- src/mds/MDCache.cc | 2 +- src/mds/mdstypes.h | 5 +++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index b5ef08b64718a..e0aaebbab660c 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -941,10 +941,9 @@ void CDir::split(int bits, list& subs, list& wai frag_info_t fragstatdiff; if (fnode.accounted_rstat.version == rstat_version) rstatdiff.add_delta(fnode.accounted_rstat, fnode.rstat); - if (fnode.accounted_fragstat.version == dirstat_version) { - bool touched_mtime; - fragstatdiff.add_delta(fnode.accounted_fragstat, fnode.fragstat, touched_mtime); - } + if (fnode.accounted_fragstat.version == dirstat_version) + fragstatdiff.add_delta(fnode.accounted_fragstat, fnode.fragstat); + dout(10) << " rstatdiff " << rstatdiff << " fragstatdiff " << fragstatdiff << dendl; prepare_old_fragment(replay); @@ -1035,7 +1034,7 @@ void CDir::merge(list& subs, list& waiters, bool rstatdiff.add_delta(dir->fnode.accounted_rstat, dir->fnode.rstat); if (dir->fnode.accounted_fragstat.version == dirstat_version) fragstatdiff.add_delta(dir->fnode.accounted_fragstat, dir->fnode.fragstat, - touched_mtime); + &touched_mtime); dir->prepare_old_fragment(replay); diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index d24aab64ced60..22a7be3c70e70 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -2013,7 +2013,7 @@ void CInode::finish_scatter_gather_update(int type) if (pf->accounted_fragstat.version == pi->dirstat.version - 1) { dout(20) << fg << " fragstat " << pf->fragstat << dendl; dout(20) << fg << " accounted_fragstat " << pf->accounted_fragstat << dendl; - pi->dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, touched_mtime); + pi->dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, &touched_mtime); } else { dout(20) << fg << " skipping STALE accounted_fragstat " << pf->accounted_fragstat << dendl; } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index c95dacb8d5a0c..574e505a4ef56 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -2278,7 +2278,7 @@ void MDCache::predirty_journal_parents(MutationRef mut, EMetaBlob *blob, dout(20) << "predirty_journal_parents add_delta " << pf->fragstat << dendl; dout(20) << "predirty_journal_parents - " << pf->accounted_fragstat << dendl; bool touched_mtime = false; - pi->dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, touched_mtime); + pi->dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, &touched_mtime); pf->accounted_fragstat = pf->fragstat; if (touched_mtime) pi->mtime = pi->ctime = pi->dirstat.mtime; diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index c7dd37b6b8fff..5c900300489bc 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -181,10 +181,11 @@ struct frag_info_t : public scatter_info_t { } // *this += cur - acc; - void add_delta(const frag_info_t &cur, frag_info_t &acc, bool& touched_mtime) { + void add_delta(const frag_info_t &cur, frag_info_t &acc, bool *touched_mtime=0) { if (cur.mtime > mtime) { mtime = cur.mtime; - touched_mtime = true; + if (touched_mtime) + *touched_mtime = true; } nfiles += cur.nfiles - acc.nfiles; nsubdirs += cur.nsubdirs - acc.nsubdirs; From 0d441dcd6af553d11d6be6df56d577c5659904a0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:40 -0400 Subject: [PATCH 24/37] mds: add change_attr to frag_info_t To give us change attribute support for directories. Whenever we gather the dirfrags, we just select the largest change_attr out of the set. Signed-off-by: Jeff Layton --- src/mds/CDir.cc | 5 ++--- src/mds/CInode.cc | 8 ++++++-- src/mds/MDCache.cc | 10 ++++++++-- src/mds/mdstypes.cc | 9 +++++++-- src/mds/mdstypes.h | 12 ++++++++++-- 5 files changed, 33 insertions(+), 11 deletions(-) diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index e0aaebbab660c..7be10cc8de829 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -943,7 +943,6 @@ void CDir::split(int bits, list& subs, list& wai rstatdiff.add_delta(fnode.accounted_rstat, fnode.rstat); if (fnode.accounted_fragstat.version == dirstat_version) fragstatdiff.add_delta(fnode.accounted_fragstat, fnode.fragstat); - dout(10) << " rstatdiff " << rstatdiff << " fragstatdiff " << fragstatdiff << dendl; prepare_old_fragment(replay); @@ -1021,7 +1020,7 @@ void CDir::merge(list& subs, list& waiters, bool nest_info_t rstatdiff; frag_info_t fragstatdiff; - bool touched_mtime; + bool touched_mtime, touched_chattr; version_t rstat_version = inode->get_projected_inode()->rstat.version; version_t dirstat_version = inode->get_projected_inode()->dirstat.version; @@ -1034,7 +1033,7 @@ void CDir::merge(list& subs, list& waiters, bool rstatdiff.add_delta(dir->fnode.accounted_rstat, dir->fnode.rstat); if (dir->fnode.accounted_fragstat.version == dirstat_version) fragstatdiff.add_delta(dir->fnode.accounted_fragstat, dir->fnode.fragstat, - &touched_mtime); + &touched_mtime, &touched_chattr); dir->prepare_old_fragment(replay); diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 22a7be3c70e70..9138a0e1622a5 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1988,7 +1988,7 @@ void CInode::finish_scatter_gather_update(int type) assert(is_auth()); inode_t *pi = get_projected_inode(); - bool touched_mtime = false; + bool touched_mtime = false, touched_chattr = false; dout(20) << " orig dirstat " << pi->dirstat << dendl; pi->dirstat.version++; for (compact_map::iterator p = dirfrags.begin(); @@ -2013,7 +2013,7 @@ void CInode::finish_scatter_gather_update(int type) if (pf->accounted_fragstat.version == pi->dirstat.version - 1) { dout(20) << fg << " fragstat " << pf->fragstat << dendl; dout(20) << fg << " accounted_fragstat " << pf->accounted_fragstat << dendl; - pi->dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, &touched_mtime); + pi->dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, &touched_mtime, &touched_chattr); } else { dout(20) << fg << " skipping STALE accounted_fragstat " << pf->accounted_fragstat << dendl; } @@ -2041,6 +2041,8 @@ void CInode::finish_scatter_gather_update(int type) } if (touched_mtime) pi->mtime = pi->ctime = pi->dirstat.mtime; + if (touched_chattr) + pi->change_attr = pi->dirstat.change_attr; dout(20) << " final dirstat " << pi->dirstat << dendl; if (dirstat_valid && !dirstat.same_sums(pi->dirstat)) { @@ -2063,6 +2065,8 @@ void CInode::finish_scatter_gather_update(int type) version_t v = pi->dirstat.version; if (pi->dirstat.mtime > dirstat.mtime) dirstat.mtime = pi->dirstat.mtime; + if (pi->dirstat.change_attr > dirstat.change_attr) + dirstat.change_attr = pi->dirstat.change_attr; pi->dirstat = dirstat; pi->dirstat.version = v; } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 574e505a4ef56..5953ed66963e6 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -2153,6 +2153,8 @@ void MDCache::predirty_journal_parents(MutationRef mut, EMetaBlob *blob, if (do_parent_mtime) { pf->fragstat.mtime = mut->get_op_stamp(); + pf->fragstat.change_attr++; + dout(10) << "predirty_journal_parents bumping change_attr to " << pf->fragstat.change_attr << " on " << parent << dendl; if (pf->fragstat.mtime > pf->rstat.rctime) { dout(10) << "predirty_journal_parents updating mtime on " << *parent << dendl; pf->rstat.rctime = pf->fragstat.mtime; @@ -2277,11 +2279,13 @@ void MDCache::predirty_journal_parents(MutationRef mut, EMetaBlob *blob, if (do_parent_mtime || linkunlink) { dout(20) << "predirty_journal_parents add_delta " << pf->fragstat << dendl; dout(20) << "predirty_journal_parents - " << pf->accounted_fragstat << dendl; - bool touched_mtime = false; - pi->dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, &touched_mtime); + bool touched_mtime = false, touched_chattr = false; + pi->dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, &touched_mtime, &touched_chattr); pf->accounted_fragstat = pf->fragstat; if (touched_mtime) pi->mtime = pi->ctime = pi->dirstat.mtime; + if (touched_chattr) + pi->change_attr = pi->dirstat.change_attr; dout(20) << "predirty_journal_parents gives " << pi->dirstat << " on " << *pin << dendl; if (parent->get_frag() == frag_t()) { // i.e., we are the only frag @@ -11997,6 +12001,8 @@ void MDCache::repair_dirfrag_stats_work(MDRequestRef& mdr) if (!good_fragstat) { if (pf->fragstat.mtime > frag_info.mtime) frag_info.mtime = pf->fragstat.mtime; + if (pf->fragstat.change_attr > frag_info.change_attr) + frag_info.change_attr = pf->fragstat.change_attr; pf->fragstat = frag_info; mds->locker->mark_updated_scatterlock(&diri->filelock); mdr->ls->dirty_dirfrag_dir.push_back(&diri->item_dirty_dirfrag_dir); diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc index 20aa36fa156f8..0edb545d3364b 100644 --- a/src/mds/mdstypes.cc +++ b/src/mds/mdstypes.cc @@ -14,21 +14,26 @@ const mds_rank_t MDS_RANK_NONE = mds_rank_t(-1); void frag_info_t::encode(bufferlist &bl) const { - ENCODE_START(2, 2, bl); + ENCODE_START(3, 2, bl); ::encode(version, bl); ::encode(mtime, bl); ::encode(nfiles, bl); ::encode(nsubdirs, bl); + ::encode(change_attr, bl); ENCODE_FINISH(bl); } void frag_info_t::decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); + DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl); ::decode(version, bl); ::decode(mtime, bl); ::decode(nfiles, bl); ::decode(nsubdirs, bl); + if (struct_v >= 3) + ::decode(change_attr, bl); + else + change_attr = 0; DECODE_FINISH(bl); } diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 5c900300489bc..8c249169a1943 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -169,10 +169,11 @@ struct scatter_info_t { struct frag_info_t : public scatter_info_t { // this frag utime_t mtime; + uint64_t change_attr; int64_t nfiles; // files int64_t nsubdirs; // subdirs - frag_info_t() : nfiles(0), nsubdirs(0) {} + frag_info_t() : change_attr(0), nfiles(0), nsubdirs(0) {} int64_t size() const { return nfiles + nsubdirs; } @@ -181,12 +182,17 @@ struct frag_info_t : public scatter_info_t { } // *this += cur - acc; - void add_delta(const frag_info_t &cur, frag_info_t &acc, bool *touched_mtime=0) { + void add_delta(const frag_info_t &cur, frag_info_t &acc, bool *touched_mtime=0, bool *touched_chattr=0) { if (cur.mtime > mtime) { mtime = cur.mtime; if (touched_mtime) *touched_mtime = true; } + if (cur.change_attr > change_attr) { + change_attr = cur.change_attr; + if (touched_chattr) + *touched_chattr = true; + } nfiles += cur.nfiles - acc.nfiles; nsubdirs += cur.nsubdirs - acc.nsubdirs; } @@ -194,6 +200,8 @@ struct frag_info_t : public scatter_info_t { void add(const frag_info_t& other) { if (other.mtime > mtime) mtime = other.mtime; + if (other.change_attr > change_attr) + change_attr = other.change_attr; nfiles += other.nfiles; nsubdirs += other.nsubdirs; } From 806991e85e93502d36514a28762ddf2261bf629a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:40 -0400 Subject: [PATCH 25/37] test: add test for change attribute of directories Signed-off-by: Jeff Layton --- src/test/libcephfs/test.cc | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/test/libcephfs/test.cc b/src/test/libcephfs/test.cc index 8e57a1d9749bc..ac34f3f0d19b4 100644 --- a/src/test/libcephfs/test.cc +++ b/src/test/libcephfs/test.cc @@ -1595,3 +1595,40 @@ TEST(LibCephFS, ChangeAttr) { ceph_close(cmount, fd); ceph_shutdown(cmount); } + +TEST(LibCephFS, DirChangeAttr) { + struct ceph_mount_info *cmount; + ASSERT_EQ(ceph_create(&cmount, NULL), 0); + ASSERT_EQ(ceph_conf_read_file(cmount, NULL), 0); + ASSERT_EQ(0, ceph_conf_parse_env(cmount, NULL)); + ASSERT_EQ(ceph_mount(cmount, "/"), 0); + + char dirname[32], filename[32]; + sprintf(dirname, "/dirchange%x", getpid()); + sprintf(filename, "%s/foo", dirname); + + ASSERT_EQ(ceph_mkdir(cmount, dirname, 0755), 0); + + struct ceph_statx stx; + ASSERT_EQ(ceph_statx(cmount, dirname, &stx, CEPH_STATX_VERSION, 0), 0); + ASSERT_TRUE(stx.stx_mask & CEPH_STATX_VERSION); + + uint64_t old_change_attr = stx.stx_version; + + int fd = ceph_open(cmount, filename, O_RDWR|O_CREAT|O_EXCL, 0666); + ASSERT_LT(0, fd); + ceph_close(cmount, fd); + + ASSERT_EQ(ceph_statx(cmount, dirname, &stx, CEPH_STATX_VERSION, 0), 0); + ASSERT_TRUE(stx.stx_mask & CEPH_STATX_VERSION); + ASSERT_NE(stx.stx_version, old_change_attr); + + old_change_attr = stx.stx_version; + + ASSERT_EQ(ceph_unlink(cmount, filename), 0); + ASSERT_EQ(ceph_statx(cmount, dirname, &stx, CEPH_STATX_VERSION, 0), 0); + ASSERT_TRUE(stx.stx_mask & CEPH_STATX_VERSION); + ASSERT_NE(stx.stx_version, old_change_attr); + + ceph_shutdown(cmount); +} From 3148b67bd77732ce66fc7f0f9c80c44f25c08466 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:40 -0400 Subject: [PATCH 26/37] cephfs: rename ceph_mds_request_head and _args with a _legacy postfix We're going to need to introduce new versions of these structures in order to expand the setattr union member. Rename the existing ones so that it's clear that they are for legacy clients and servers. Signed-off-by: Jeff Layton --- src/client/Client.cc | 2 +- src/client/MetaRequest.h | 4 ++-- src/include/ceph_fs.h | 6 +++--- src/include/types.h | 2 +- src/messages/MClientRequest.h | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index ddf38f9beff1a..433f1b0168a97 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -2146,7 +2146,7 @@ MClientRequest* Client::build_client_request(MetaRequest *request) MClientRequest *req = new MClientRequest(request->get_op()); req->set_tid(request->tid); req->set_stamp(request->op_stamp); - memcpy(&req->head, &request->head, sizeof(ceph_mds_request_head)); + memcpy(&req->head, &request->head, sizeof(ceph_mds_request_head_legacy)); // if the filepath's haven't been set, set them! if (request->path.empty()) { diff --git a/src/client/MetaRequest.h b/src/client/MetaRequest.h index 1776bdefbe47f..2640c5afc8c36 100644 --- a/src/client/MetaRequest.h +++ b/src/client/MetaRequest.h @@ -27,7 +27,7 @@ struct MetaRequest { public: uint64_t tid; utime_t op_stamp; - ceph_mds_request_head head; + ceph_mds_request_head_legacy head; filepath path, path2; bufferlist data; int inode_drop; //the inode caps this operation will drop @@ -86,7 +86,7 @@ struct MetaRequest { got_unsafe(false), item(this), unsafe_item(this), unsafe_dir_item(this), unsafe_target_item(this), caller_cond(0), dispatch_cond(0) { - memset(&head, 0, sizeof(ceph_mds_request_head)); + memset(&head, 0, sizeof(ceph_mds_request_head_legacy)); head.op = op; } ~MetaRequest(); diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 1610aaf17cc2c..b4f2ae6e4146c 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -399,7 +399,7 @@ extern const char *ceph_mds_op_name(int op); #define CEPH_READDIR_FRAG_COMPLETE (1<<8) #define CEPH_READDIR_HASH_ORDER (1<<9) -union ceph_mds_request_args { +union ceph_mds_request_args_legacy { struct { __le32 mask; /* CEPH_CAP_* */ } __attribute__ ((packed)) getattr; @@ -456,7 +456,7 @@ union ceph_mds_request_args { #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ #define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ -struct ceph_mds_request_head { +struct ceph_mds_request_head_legacy { __le64 oldest_client_tid; __le32 mdsmap_epoch; /* on client */ __le32 flags; /* CEPH_MDS_FLAG_* */ @@ -466,7 +466,7 @@ struct ceph_mds_request_head { __le32 caller_uid, caller_gid; __le64 ino; /* use this ino for openc, mkdir, mknod, etc. (if replaying) */ - union ceph_mds_request_args args; + union ceph_mds_request_args_legacy args; } __attribute__ ((packed)); /* cap/lease release record */ diff --git a/src/include/types.h b/src/include/types.h index 7735654bf9273..545006c61aef5 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -244,7 +244,7 @@ WRITE_RAW_ENCODER(ceph_fsid) WRITE_RAW_ENCODER(ceph_file_layout) WRITE_RAW_ENCODER(ceph_dir_layout) WRITE_RAW_ENCODER(ceph_mds_session_head) -WRITE_RAW_ENCODER(ceph_mds_request_head) +WRITE_RAW_ENCODER(ceph_mds_request_head_legacy) WRITE_RAW_ENCODER(ceph_mds_request_release) WRITE_RAW_ENCODER(ceph_filelock) WRITE_RAW_ENCODER(ceph_mds_caps_head) diff --git a/src/messages/MClientRequest.h b/src/messages/MClientRequest.h index 1c37459105760..6d0c27ec6c167 100644 --- a/src/messages/MClientRequest.h +++ b/src/messages/MClientRequest.h @@ -50,7 +50,7 @@ class MClientRequest : public Message { static const int COMPAT_VERSION = 1; public: - struct ceph_mds_request_head head; + struct ceph_mds_request_head_legacy head; utime_t stamp; struct Release { From 2bdc8727f6179d4c940a7ddd5dba92665405e612 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:41 -0400 Subject: [PATCH 27/37] MDS: allow the MDS to accept requests to set the btime Unfortunately, the only option here is to rev the MClientRequest version as the ceph_mds_request_head is not currently versioned. Add a new ceph_mds_request_head, which contains a new ceph_mds_request_args structure. The new ceph_mds_request_head is now versioned via a __le16 at the beginning of it, and then the args structure is expanded to hold the btime. When we get a legacy ceph_mds_request_head, we just set the new fields to zero. When encoding a reply to a legacy client, we simply don't encode the version in the head, or the btime in the setattr union member. Reluctantly-Signed-off-by: Jeff Layton --- src/client/Client.cc | 2 +- src/client/MetaRequest.h | 2 +- src/include/ceph_fs.h | 81 +++++++++++++++++++++++++++++++++++ src/include/types.h | 1 + src/mds/Server.cc | 6 ++- src/messages/MClientRequest.h | 40 +++++++++++++++-- 6 files changed, 124 insertions(+), 8 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 433f1b0168a97..ddf38f9beff1a 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -2146,7 +2146,7 @@ MClientRequest* Client::build_client_request(MetaRequest *request) MClientRequest *req = new MClientRequest(request->get_op()); req->set_tid(request->tid); req->set_stamp(request->op_stamp); - memcpy(&req->head, &request->head, sizeof(ceph_mds_request_head_legacy)); + memcpy(&req->head, &request->head, sizeof(ceph_mds_request_head)); // if the filepath's haven't been set, set them! if (request->path.empty()) { diff --git a/src/client/MetaRequest.h b/src/client/MetaRequest.h index 2640c5afc8c36..8eb02048da0b2 100644 --- a/src/client/MetaRequest.h +++ b/src/client/MetaRequest.h @@ -86,7 +86,7 @@ struct MetaRequest { got_unsafe(false), item(this), unsafe_item(this), unsafe_dir_item(this), unsafe_target_item(this), caller_cond(0), dispatch_cond(0) { - memset(&head, 0, sizeof(ceph_mds_request_head_legacy)); + memset(&head, 0, sizeof(head)); head.op = op; } ~MetaRequest(); diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index b4f2ae6e4146c..73701a6ba6d9d 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -376,6 +376,7 @@ extern const char *ceph_mds_op_name(int op); #define CEPH_SETATTR_ATIME (1 << 4) #define CEPH_SETATTR_SIZE (1 << 5) #define CEPH_SETATTR_CTIME (1 << 6) +#define CEPH_SETATTR_BTIME (1 << 9) #endif #define CEPH_SETATTR_MTIME_NOW (1 << 7) #define CEPH_SETATTR_ATIME_NOW (1 << 8) @@ -399,6 +400,7 @@ extern const char *ceph_mds_op_name(int op); #define CEPH_READDIR_FRAG_COMPLETE (1<<8) #define CEPH_READDIR_HASH_ORDER (1<<9) +/* Note that this is embedded wthin ceph_mds_request_head_legacy. */ union ceph_mds_request_args_legacy { struct { __le32 mask; /* CEPH_CAP_* */ @@ -469,6 +471,85 @@ struct ceph_mds_request_head_legacy { union ceph_mds_request_args_legacy args; } __attribute__ ((packed)); +/* + * Note that this is embedded wthin ceph_mds_request_head. Also, compatability + * with the ceph_mds_request_args_legacy must be maintained! + */ +union ceph_mds_request_args { + struct { + __le32 mask; /* CEPH_CAP_* */ + } __attribute__ ((packed)) getattr; + struct { + __le32 mode; + __le32 uid; + __le32 gid; + struct ceph_timespec mtime; + struct ceph_timespec atime; + __le64 size, old_size; /* old_size needed by truncate */ + __le32 mask; /* CEPH_SETATTR_* */ + struct ceph_timespec btime; + } __attribute__ ((packed)) setattr; + struct { + __le32 frag; /* which dir fragment */ + __le32 max_entries; /* how many dentries to grab */ + __le32 max_bytes; + __le16 flags; + } __attribute__ ((packed)) readdir; + struct { + __le32 mode; + __le32 rdev; + } __attribute__ ((packed)) mknod; + struct { + __le32 mode; + } __attribute__ ((packed)) mkdir; + struct { + __le32 flags; + __le32 mode; + __le32 stripe_unit; /* layout for newly created file */ + __le32 stripe_count; /* ... */ + __le32 object_size; + __le32 pool; /* if >= 0 and CREATEPOOLID feature */ + __le32 mask; /* CEPH_CAP_* */ + __le64 old_size; /* if O_TRUNC */ + } __attribute__ ((packed)) open; + struct { + __le32 flags; + __le32 osdmap_epoch; /* use for set file/dir layout */ + } __attribute__ ((packed)) setxattr; + struct { + struct ceph_file_layout layout; + } __attribute__ ((packed)) setlayout; + struct { + __u8 rule; /* currently fcntl or flock */ + __u8 type; /* shared, exclusive, remove*/ + __le64 owner; /* who requests/holds the lock */ + __le64 pid; /* process id requesting the lock */ + __le64 start; /* initial location to lock */ + __le64 length; /* num bytes to lock from start */ + __u8 wait; /* will caller wait for lock to become available? */ + } __attribute__ ((packed)) filelock_change; +} __attribute__ ((packed)); + +#define CEPH_MDS_REQUEST_HEAD_VERSION 1 + +/* + * Note that any change to this structure must ensure that it is compatible + * with ceph_mds_request_head_legacy. + */ +struct ceph_mds_request_head { + __le16 version; + __le64 oldest_client_tid; + __le32 mdsmap_epoch; /* on client */ + __le32 flags; /* CEPH_MDS_FLAG_* */ + __u8 num_retry, num_fwd; /* count retry, fwd attempts */ + __le16 num_releases; /* # include cap/lease release records */ + __le32 op; /* mds op code */ + __le32 caller_uid, caller_gid; + __le64 ino; /* use this ino for openc, mkdir, mknod, + etc. (if replaying) */ + union ceph_mds_request_args args; +} __attribute__ ((packed)); + /* cap/lease release record */ struct ceph_mds_request_release { __le64 ino, cap_id; /* ino and unique cap id */ diff --git a/src/include/types.h b/src/include/types.h index 545006c61aef5..3593f5af292b0 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -245,6 +245,7 @@ WRITE_RAW_ENCODER(ceph_file_layout) WRITE_RAW_ENCODER(ceph_dir_layout) WRITE_RAW_ENCODER(ceph_mds_session_head) WRITE_RAW_ENCODER(ceph_mds_request_head_legacy) +WRITE_RAW_ENCODER(ceph_mds_request_head) WRITE_RAW_ENCODER(ceph_mds_request_release) WRITE_RAW_ENCODER(ceph_filelock) WRITE_RAW_ENCODER(ceph_mds_caps_head) diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 3ad1654b52493..9a6bc92c05d63 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3746,7 +3746,7 @@ void Server::handle_client_setattr(MDRequestRef& mdr) __u32 access_mask = MAY_WRITE; // xlock inode - if (mask & (CEPH_SETATTR_MODE|CEPH_SETATTR_UID|CEPH_SETATTR_GID)) + if (mask & (CEPH_SETATTR_MODE|CEPH_SETATTR_UID|CEPH_SETATTR_GID|CEPH_SETATTR_BTIME)) xlocks.insert(&cur->authlock); if (mask & (CEPH_SETATTR_MTIME|CEPH_SETATTR_ATIME|CEPH_SETATTR_SIZE)) xlocks.insert(&cur->filelock); @@ -3811,7 +3811,9 @@ void Server::handle_client_setattr(MDRequestRef& mdr) pi->mtime = req->head.args.setattr.mtime; if (mask & CEPH_SETATTR_ATIME) pi->atime = req->head.args.setattr.atime; - if (mask & (CEPH_SETATTR_ATIME | CEPH_SETATTR_MTIME)) + if (mask & CEPH_SETATTR_BTIME) + pi->btime = req->head.args.setattr.btime; + if (mask & (CEPH_SETATTR_ATIME | CEPH_SETATTR_MTIME | CEPH_SETATTR_BTIME)) pi->time_warp_seq++; // maybe not a timewarp, but still a serialization point. if (mask & CEPH_SETATTR_SIZE) { if (truncating_smaller) { diff --git a/src/messages/MClientRequest.h b/src/messages/MClientRequest.h index 6d0c27ec6c167..26f69f7f3ec72 100644 --- a/src/messages/MClientRequest.h +++ b/src/messages/MClientRequest.h @@ -36,6 +36,7 @@ #include "msg/Message.h" #include "include/filepath.h" #include "mds/mdstypes.h" +#include "include/ceph_features.h" #include #include @@ -46,11 +47,11 @@ // metadata ops. class MClientRequest : public Message { - static const int HEAD_VERSION = 3; + static const int HEAD_VERSION = 4; static const int COMPAT_VERSION = 1; public: - struct ceph_mds_request_head_legacy head; + struct ceph_mds_request_head head; utime_t stamp; struct Release { @@ -160,7 +161,28 @@ class MClientRequest : public Message { void decode_payload() { bufferlist::iterator p = payload.begin(); - ::decode(head, p); + + if (header.version >= 4) { + ::decode(head, p); + } else { + struct ceph_mds_request_head_legacy *old_mds_head = + (struct ceph_mds_request_head_legacy *)&(head.oldest_client_tid); + + ::decode(*old_mds_head, p); + + head.version = 0; + + /* Can't set the btime from legacy struct */ + if (head.op == CEPH_MDS_OP_SETATTR) { + int localmask = head.args.setattr.mask; + + localmask &= ~CEPH_SETATTR_BTIME; + + head.args.setattr.btime = { 0 }; + head.args.setattr.mask = localmask; + } + } + ::decode(path, p); ::decode(path2, p); ::decode_nohead(head.num_releases, releases, p); @@ -170,7 +192,17 @@ class MClientRequest : public Message { void encode_payload(uint64_t features) { head.num_releases = releases.size(); - ::encode(head, payload); + head.version = CEPH_MDS_REQUEST_HEAD_VERSION; + + if (features & CEPH_FEATURE_FS_BTIME) { + ::encode(head, payload); + } else { + struct ceph_mds_request_head_legacy *old_mds_head = + (struct ceph_mds_request_head_legacy *)&(head.oldest_client_tid); + + ::encode(*old_mds_head, payload); + } + ::encode(path, payload); ::encode(path2, payload); ::encode_nohead(releases, payload); From bce221c1b99a564d2f4827d2f28cbe939184d308 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:41 -0400 Subject: [PATCH 28/37] client: add the ability to set the btime This adds a new set of libcephfs calls: ceph_ll_setattrx and ceph_setattrx. This allows clients to set the btime in addition to other values that are typically settable via ceph_setattr calls. Currently, the setattrx mask uses the same CEPH_SETATTR values that the ceph_setattr interface uses. I'm not sure this is what we will want though. Would it be better to rephrase that via STATX_* constants? Signed-off-by: Jeff Layton --- src/client/Client.cc | 186 +++++++++++++++++++++++---------- src/client/Client.h | 13 ++- src/client/MetaRequest.h | 2 +- src/include/cephfs/libcephfs.h | 31 ++++-- src/libcephfs.cc | 15 +++ src/test/libcephfs/test.cc | 31 ++++++ 6 files changed, 210 insertions(+), 68 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index ddf38f9beff1a..16b4ffaf33f1a 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -5064,7 +5064,7 @@ int Client::xattr_permission(Inode *in, const char *name, unsigned want, int uid return r; } -int Client::may_setattr(Inode *in, struct stat *st, int mask, int uid, int gid) +int Client::may_setattr(Inode *in, struct ceph_statx *stx, int mask, int uid, int gid) { if (uid < 0) uid = get_uid(); @@ -5084,12 +5084,12 @@ int Client::may_setattr(Inode *in, struct stat *st, int mask, int uid, int gid) r = -EPERM; if (mask & CEPH_SETATTR_UID) { - if (uid != 0 && ((uid_t)uid != in->uid || st->st_uid != in->uid)) + if (uid != 0 && ((uid_t)uid != in->uid || stx->stx_uid != in->uid)) goto out; } if (mask & CEPH_SETATTR_GID) { if (uid != 0 && ((uid_t)uid != in->uid || - (!groups.is_in(st->st_gid) && st->st_gid != in->gid))) + (!groups.is_in(stx->stx_gid) && stx->stx_gid != in->gid))) goto out; } @@ -5097,14 +5097,14 @@ int Client::may_setattr(Inode *in, struct stat *st, int mask, int uid, int gid) if (uid != 0 && (uid_t)uid != in->uid) goto out; - gid_t i_gid = (mask & CEPH_SETATTR_GID) ? st->st_gid : in->gid; + gid_t i_gid = (mask & CEPH_SETATTR_GID) ? stx->stx_gid : in->gid; if (uid != 0 && !groups.is_in(i_gid)) - st->st_mode &= ~S_ISGID; + stx->stx_mode &= ~S_ISGID; } - if (mask & (CEPH_SETATTR_CTIME | CEPH_SETATTR_MTIME | CEPH_SETATTR_ATIME)) { + if (mask & (CEPH_SETATTR_CTIME | CEPH_SETATTR_BTIME | CEPH_SETATTR_MTIME | CEPH_SETATTR_ATIME)) { if (uid != 0 && (uid_t)uid != in->uid) { - int check_mask = CEPH_SETATTR_CTIME; + int check_mask = CEPH_SETATTR_CTIME | CEPH_SETATTR_BTIME; if (!(mask & CEPH_SETATTR_MTIME_NOW)) check_mask |= CEPH_SETATTR_MTIME; if (!(mask & CEPH_SETATTR_ATIME_NOW)) @@ -6421,7 +6421,7 @@ int Client::_getattr(Inode *in, int mask, int uid, int gid, bool force) return res; } -int Client::_do_setattr(Inode *in, struct stat *attr, int mask, int uid, int gid, +int Client::_do_setattr(Inode *in, struct ceph_statx *stx, int mask, int uid, int gid, InodeRef *inp) { int issued = in->caps_issued(); @@ -6433,8 +6433,8 @@ int Client::_do_setattr(Inode *in, struct stat *attr, int mask, int uid, int gid return -EROFS; } if ((mask & CEPH_SETATTR_SIZE) && - (unsigned long)attr->st_size > in->size && - is_quota_bytes_exceeded(in, (unsigned long)attr->st_size - in->size)) { + (unsigned long)stx->stx_size > in->size && + is_quota_bytes_exceeded(in, (unsigned long)stx->stx_size - in->size)) { return -EDQUOT; } @@ -6486,36 +6486,45 @@ int Client::_do_setattr(Inode *in, struct stat *attr, int mask, int uid, int gid in->ctime = ceph_clock_now(cct); in->cap_dirtier_uid = uid; in->cap_dirtier_gid = gid; - in->mode = (in->mode & ~07777) | (attr->st_mode & 07777); + in->mode = (in->mode & ~07777) | (stx->stx_mode & 07777); mark_caps_dirty(in, CEPH_CAP_AUTH_EXCL); mask &= ~CEPH_SETATTR_MODE; - ldout(cct,10) << "changing mode to " << attr->st_mode << dendl; + ldout(cct,10) << "changing mode to " << stx->stx_mode << dendl; } if (mask & CEPH_SETATTR_UID) { in->ctime = ceph_clock_now(cct); in->cap_dirtier_uid = uid; in->cap_dirtier_gid = gid; - in->uid = attr->st_uid; + in->uid = stx->stx_uid; mark_caps_dirty(in, CEPH_CAP_AUTH_EXCL); mask &= ~CEPH_SETATTR_UID; - ldout(cct,10) << "changing uid to " << attr->st_uid << dendl; + ldout(cct,10) << "changing uid to " << stx->stx_uid << dendl; } if (mask & CEPH_SETATTR_GID) { in->ctime = ceph_clock_now(cct); in->cap_dirtier_uid = uid; in->cap_dirtier_gid = gid; - in->gid = attr->st_gid; + in->gid = stx->stx_gid; mark_caps_dirty(in, CEPH_CAP_AUTH_EXCL); mask &= ~CEPH_SETATTR_GID; - ldout(cct,10) << "changing gid to " << attr->st_gid << dendl; + ldout(cct,10) << "changing gid to " << stx->stx_gid << dendl; + } + if (mask & CEPH_SETATTR_BTIME) { + in->ctime = ceph_clock_now(cct); + in->cap_dirtier_uid = uid; + in->cap_dirtier_gid = gid; + in->btime = utime_t(stx->stx_btime, stx->stx_btime_ns); + mark_caps_dirty(in, CEPH_CAP_AUTH_EXCL); + mask &= ~CEPH_SETATTR_BTIME; + ldout(cct,10) << "changing btime to " << in->btime << dendl; } } if (in->caps_issued_mask(CEPH_CAP_FILE_EXCL)) { if (mask & (CEPH_SETATTR_MTIME|CEPH_SETATTR_ATIME)) { if (mask & CEPH_SETATTR_MTIME) - in->mtime = utime_t(stat_get_mtime_sec(attr), stat_get_mtime_nsec(attr)); + in->mtime = utime_t(stx->stx_mtime, stx->stx_mtime_ns); if (mask & CEPH_SETATTR_ATIME) - in->atime = utime_t(stat_get_atime_sec(attr), stat_get_atime_nsec(attr)); + in->atime = utime_t(stx->stx_atime, stx->stx_atime_ns); in->ctime = ceph_clock_now(cct); in->cap_dirtier_uid = uid; in->cap_dirtier_gid = gid; @@ -6539,35 +6548,38 @@ int Client::_do_setattr(Inode *in, struct stat *attr, int mask, int uid, int gid req->set_inode(in); if (mask & CEPH_SETATTR_MODE) { - req->head.args.setattr.mode = attr->st_mode; + req->head.args.setattr.mode = stx->stx_mode; req->inode_drop |= CEPH_CAP_AUTH_SHARED; - ldout(cct,10) << "changing mode to " << attr->st_mode << dendl; + ldout(cct,10) << "changing mode to " << stx->stx_mode << dendl; } if (mask & CEPH_SETATTR_UID) { - req->head.args.setattr.uid = attr->st_uid; + req->head.args.setattr.uid = stx->stx_uid; req->inode_drop |= CEPH_CAP_AUTH_SHARED; - ldout(cct,10) << "changing uid to " << attr->st_uid << dendl; + ldout(cct,10) << "changing uid to " << stx->stx_uid << dendl; } if (mask & CEPH_SETATTR_GID) { - req->head.args.setattr.gid = attr->st_gid; + req->head.args.setattr.gid = stx->stx_gid; req->inode_drop |= CEPH_CAP_AUTH_SHARED; - ldout(cct,10) << "changing gid to " << attr->st_gid << dendl; + ldout(cct,10) << "changing gid to " << stx->stx_gid << dendl; } if (mask & CEPH_SETATTR_MTIME) { - utime_t mtime = utime_t(stat_get_mtime_sec(attr), stat_get_mtime_nsec(attr)); - req->head.args.setattr.mtime = mtime; + req->head.args.setattr.mtime = utime_t(stx->stx_mtime, stx->stx_mtime_ns); req->inode_drop |= CEPH_CAP_AUTH_SHARED | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; } if (mask & CEPH_SETATTR_ATIME) { - utime_t atime = utime_t(stat_get_atime_sec(attr), stat_get_atime_nsec(attr)); - req->head.args.setattr.atime = atime; + req->head.args.setattr.atime = utime_t(stx->stx_atime, stx->stx_atime_ns); + req->inode_drop |= CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD | + CEPH_CAP_FILE_WR; + } + if (mask & CEPH_SETATTR_BTIME) { + req->head.args.setattr.btime = utime_t(stx->stx_btime, stx->stx_btime_ns); req->inode_drop |= CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; } if (mask & CEPH_SETATTR_SIZE) { - if ((unsigned long)attr->st_size < mdsmap->get_max_filesize()) - req->head.args.setattr.size = attr->st_size; + if ((unsigned long)stx->stx_size < mdsmap->get_max_filesize()) + req->head.args.setattr.size = stx->stx_size; else { //too big! put_request(req); return -EFBIG; @@ -6584,29 +6596,51 @@ int Client::_do_setattr(Inode *in, struct stat *attr, int mask, int uid, int gid return res; } -int Client::_setattr(Inode *in, struct stat *attr, int mask, int uid, int gid, +/* Note that we only care about attrs that setattr cares about */ +void Client::stat_to_statx(struct stat *st, struct ceph_statx *stx) +{ + stx->stx_size = st->st_size; + stx->stx_mode = st->st_mode; + stx->stx_uid = st->st_uid; + stx->stx_gid = st->st_gid; + stx->stx_mtime = stat_get_mtime_sec(st); + stx->stx_mtime_ns = stat_get_mtime_nsec(st); + stx->stx_atime = stat_get_atime_sec(st); + stx->stx_atime_ns = stat_get_atime_nsec(st); +} + +int Client::__setattrx(Inode *in, struct ceph_statx *stx, int mask, int uid, int gid, InodeRef *inp) { - int ret = _do_setattr(in, attr, mask, uid, gid, inp); + int ret = _do_setattr(in, stx, mask, uid, gid, inp); if (ret < 0) return ret; if (mask & CEPH_SETATTR_MODE) - ret = _posix_acl_chmod(in, attr->st_mode, uid, gid); + ret = _posix_acl_chmod(in, stx->stx_mode, uid, gid); return ret; } -int Client::_setattr(InodeRef &in, struct stat *attr, int mask) +int Client::_setattrx(InodeRef &in, struct ceph_statx *stx, int mask) { mask &= (CEPH_SETATTR_MODE | CEPH_SETATTR_UID | CEPH_SETATTR_GID | CEPH_SETATTR_MTIME | CEPH_SETATTR_ATIME | CEPH_SETATTR_SIZE | - CEPH_SETATTR_CTIME); + CEPH_SETATTR_CTIME | CEPH_SETATTR_BTIME); if (cct->_conf->client_permissions) { - int r = may_setattr(in.get(), attr, mask); + int r = may_setattr(in.get(), stx, mask); if (r < 0) return r; } - return _setattr(in.get(), attr, mask); + return __setattrx(in.get(), stx, mask); +} + +int Client::_setattr(InodeRef &in, struct stat *attr, int mask) +{ + struct ceph_statx stx; + + stat_to_statx(attr, &stx); + mask &= ~CEPH_SETATTR_BTIME; + return _setattrx(in, &stx, mask); } int Client::setattr(const char *relpath, struct stat *attr, int mask) @@ -6624,6 +6658,21 @@ int Client::setattr(const char *relpath, struct stat *attr, int mask) return _setattr(in, attr, mask); } +int Client::setattrx(const char *relpath, struct ceph_statx *stx, int mask, int flags) +{ + Mutex::Locker lock(client_lock); + tout(cct) << "setattrx" << std::endl; + tout(cct) << relpath << std::endl; + tout(cct) << mask << std::endl; + + filepath path(relpath); + InodeRef in; + int r = path_walk(path, &in, flags & AT_SYMLINK_NOFOLLOW); + if (r < 0) + return r; + return _setattrx(in, stx, mask); +} + int Client::fsetattr(int fd, struct stat *attr, int mask) { Mutex::Locker lock(client_lock); @@ -8844,9 +8893,9 @@ int Client::_flush(Fh *f) int Client::truncate(const char *relpath, loff_t length) { - struct stat attr; - attr.st_size = length; - return setattr(relpath, &attr, CEPH_SETATTR_SIZE); + struct ceph_statx stx; + stx.stx_size = length; + return setattrx(relpath, &stx, CEPH_SETATTR_SIZE); } int Client::ftruncate(int fd, loff_t length) @@ -9859,41 +9908,66 @@ int Client::ll_getattrx(Inode *in, struct ceph_statx *stx, unsigned int want, return res; } -int Client::ll_setattr(Inode *in, struct stat *attr, int mask, int uid, - int gid) +int Client::_ll_setattrx(Inode *in, struct ceph_statx *stx, int mask, int uid, + int gid, InodeRef *inp) { Mutex::Locker lock(client_lock); vinodeno_t vino = _get_vino(in); - ldout(cct, 3) << "ll_setattr " << vino << " mask " << hex << mask << dec + ldout(cct, 3) << "ll_setattrx " << vino << " mask " << hex << mask << dec << dendl; - tout(cct) << "ll_setattr" << std::endl; + tout(cct) << "ll_setattrx" << std::endl; tout(cct) << vino.ino.val << std::endl; - tout(cct) << attr->st_mode << std::endl; - tout(cct) << attr->st_uid << std::endl; - tout(cct) << attr->st_gid << std::endl; - tout(cct) << attr->st_size << std::endl; - tout(cct) << attr->st_mtime << std::endl; - tout(cct) << attr->st_atime << std::endl; + tout(cct) << stx->stx_mode << std::endl; + tout(cct) << stx->stx_uid << std::endl; + tout(cct) << stx->stx_gid << std::endl; + tout(cct) << stx->stx_size << std::endl; + tout(cct) << stx->stx_mtime << std::endl; + tout(cct) << stx->stx_atime << std::endl; + tout(cct) << stx->stx_btime << std::endl; tout(cct) << mask << std::endl; if (!cct->_conf->fuse_default_permissions) { - int res = may_setattr(in, attr, mask, uid, gid); + int res = may_setattr(in, stx, mask, uid, gid); if (res < 0) return res; } mask &= ~(CEPH_SETATTR_MTIME_NOW | CEPH_SETATTR_ATIME_NOW); + return __setattrx(in, stx, mask, uid, gid, inp); +} + +int Client::ll_setattrx(Inode *in, struct ceph_statx *stx, int mask, int uid, + int gid) +{ + InodeRef target(in); + int res = _ll_setattrx(in, stx, mask, uid, gid, &target); + if (res == 0) { + assert(in == target.get()); + fill_statx(in, in->caps_issued(), stx); + } + + ldout(cct, 3) << "ll_setattrx " << _get_vino(in) << " = " << res << dendl; + return res; +} + +int Client::ll_setattr(Inode *in, struct stat *attr, int mask, int uid, + int gid) +{ + struct ceph_statx stx; + + stat_to_statx(attr, &stx); + InodeRef target(in); - int res = _setattr(in, attr, mask, uid, gid, &target); + int res = _ll_setattrx(in, &stx, mask, uid, gid, &target); if (res == 0) { assert(in == target.get()); fill_stat(in, attr); } - ldout(cct, 3) << "ll_setattr " << vino << " = " << res << dendl; + ldout(cct, 3) << "ll_setattr " << _get_vino(in) << " = " << res << dendl; return res; } @@ -10217,9 +10291,9 @@ int Client::_setxattr(Inode *in, const char *name, const void *value, size = 0; } if (new_mode != in->mode) { - struct stat attr; - attr.st_mode = new_mode; - ret = _do_setattr(in, &attr, CEPH_SETATTR_MODE, uid, gid, NULL); + struct ceph_statx stx; + stx.stx_mode = new_mode; + ret = _do_setattr(in, &stx, CEPH_SETATTR_MODE, uid, gid, NULL); if (ret < 0) return ret; } diff --git a/src/client/Client.h b/src/client/Client.h index 4413d4b459106..e5c655c4fbdea 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -773,9 +773,13 @@ class Client : public Dispatcher, public md_config_obs_t { int _rmdir(Inode *dir, const char *name, int uid=-1, int gid=-1); int _symlink(Inode *dir, const char *name, const char *target, int uid=-1, int gid=-1, InodeRef *inp = 0); int _mknod(Inode *dir, const char *name, mode_t mode, dev_t rdev, int uid=-1, int gid=-1, InodeRef *inp = 0); - int _do_setattr(Inode *in, struct stat *attr, int mask, int uid, int gid, InodeRef *inp); - int _setattr(Inode *in, struct stat *attr, int mask, int uid=-1, int gid=-1, InodeRef *inp = 0); + int _do_setattr(Inode *in, struct ceph_statx *stx, int mask, int uid, int gid, InodeRef *inp); + void stat_to_statx(struct stat *st, struct ceph_statx *stx); + int __setattrx(Inode *in, struct ceph_statx *stx, int mask, int uid=-1, int gid=-1, InodeRef *inp = 0); + int _setattrx(InodeRef &in, struct ceph_statx *stx, int mask); int _setattr(InodeRef &in, struct stat *attr, int mask); + int _ll_setattrx(Inode *in, struct ceph_statx *stx, int mask, int uid = -1, + int gid = -1, InodeRef *inp = 0); int _getattr(Inode *in, int mask, int uid=-1, int gid=-1, bool force=false); int _getattr(InodeRef &in, int mask, int uid=-1, int gid=-1, bool force=false) { return _getattr(in.get(), mask, uid, gid, force); @@ -845,7 +849,7 @@ class Client : public Dispatcher, public md_config_obs_t { int inode_permission(Inode *in, uid_t uid, UserGroups& groups, unsigned want); int xattr_permission(Inode *in, const char *name, unsigned want, int uid=-1, int gid=-1); - int may_setattr(Inode *in, struct stat *st, int mask, int uid=-1, int gid=-1); + int may_setattr(Inode *in, struct ceph_statx *stx, int mask, int uid=-1, int gid=-1); int may_open(Inode *in, int flags, int uid=-1, int gid=-1); int may_lookup(Inode *dir, int uid=-1, int gid=-1); int may_create(Inode *dir, int uid=-1, int gid=-1); @@ -999,6 +1003,7 @@ class Client : public Dispatcher, public md_config_obs_t { int lstatlite(const char *path, struct statlite *buf); int setattr(const char *relpath, struct stat *attr, int mask); + int setattrx(const char *relpath, struct ceph_statx *stx, int mask, int flags=0); int fsetattr(int fd, struct stat *attr, int mask); int chmod(const char *path, mode_t mode); int fchmod(int fd, mode_t mode); @@ -1096,6 +1101,8 @@ class Client : public Dispatcher, public md_config_obs_t { int ll_getattr(Inode *in, struct stat *st, int uid = -1, int gid = -1); int ll_getattrx(Inode *in, struct ceph_statx *stx, unsigned int want, unsigned int flags, int uid = -1, int gid = -1); + int ll_setattrx(Inode *in, struct ceph_statx *stx, int mask, int uid = -1, + int gid = -1); int ll_setattr(Inode *in, struct stat *st, int mask, int uid = -1, int gid = -1); int ll_getxattr(Inode *in, const char *name, void *value, size_t size, diff --git a/src/client/MetaRequest.h b/src/client/MetaRequest.h index 8eb02048da0b2..341533ebc1e12 100644 --- a/src/client/MetaRequest.h +++ b/src/client/MetaRequest.h @@ -27,7 +27,7 @@ struct MetaRequest { public: uint64_t tid; utime_t op_stamp; - ceph_mds_request_head_legacy head; + ceph_mds_request_head head; filepath path, path2; bufferlist data; int inode_drop; //the inode caps this operation will drop diff --git a/src/include/cephfs/libcephfs.h b/src/include/cephfs/libcephfs.h index c550bcd7b02d2..688702130b621 100644 --- a/src/include/cephfs/libcephfs.h +++ b/src/include/cephfs/libcephfs.h @@ -106,13 +106,14 @@ struct CephContext; /* setattr mask bits */ #ifndef CEPH_SETATTR_MODE -# define CEPH_SETATTR_MODE 1 -# define CEPH_SETATTR_UID 2 -# define CEPH_SETATTR_GID 4 -# define CEPH_SETATTR_MTIME 8 -# define CEPH_SETATTR_ATIME 16 -# define CEPH_SETATTR_SIZE 32 -# define CEPH_SETATTR_CTIME 64 +# define CEPH_SETATTR_MODE 1 +# define CEPH_SETATTR_UID 2 +# define CEPH_SETATTR_GID 4 +# define CEPH_SETATTR_MTIME 8 +# define CEPH_SETATTR_ATIME 16 +# define CEPH_SETATTR_SIZE 32 +# define CEPH_SETATTR_CTIME 64 +# define CEPH_SETATTR_BTIME 512 #endif /* define error codes for the mount function*/ @@ -649,11 +650,23 @@ int ceph_lstat(struct ceph_mount_info *cmount, const char *path, struct stat *st * @param cmount the ceph mount handle to use for performing the setattr. * @param relpath the path to the file/directory to set the attributes of. * @param attr the stat struct that must include attribute values to set on the file. - * @param mask a mask of all the stat values that have been set on the stat struct. + * @param mask a mask of all the CEPH_SETATTR_* values that have been set in the stat struct. * @returns 0 on success or negative error code on failure. */ int ceph_setattr(struct ceph_mount_info *cmount, const char *relpath, struct stat *attr, int mask); +/** + * Set a file's attributes (extended version). + * + * @param cmount the ceph mount handle to use for performing the setattr. + * @param relpath the path to the file/directory to set the attributes of. + * @param stx the statx struct that must include attribute values to set on the file. + * @param mask a mask of all the CEPH_SETATTR_* values that have been set in the statx struct. + * @param flags mask of AT_* flags (only AT_ATTR_NOFOLLOW is respected for now) + * @returns 0 on success or negative error code on failure. + */ +int ceph_setattrx(struct ceph_mount_info *cmount, const char *relpath, struct ceph_statx *stx, int mask, int flags); + /** * Change the mode bits (permissions) of a file/directory. * @@ -1413,6 +1426,8 @@ int ceph_ll_getattrx(struct ceph_mount_info *cmount, struct Inode *in, int uid, int gid); int ceph_ll_setattr(struct ceph_mount_info *cmount, struct Inode *in, struct stat *st, int mask, int uid, int gid); +int ceph_ll_setattrx(struct ceph_mount_info *cmount, struct Inode *in, + struct ceph_statx *stx, int mask, int uid, int gid); int ceph_ll_open(struct ceph_mount_info *cmount, struct Inode *in, int flags, struct Fh **fh, int uid, int gid); off_t ceph_ll_lseek(struct ceph_mount_info *cmount, struct Fh* filehandle, diff --git a/src/libcephfs.cc b/src/libcephfs.cc index c01255e59063b..05ddf8e82b47f 100644 --- a/src/libcephfs.cc +++ b/src/libcephfs.cc @@ -634,6 +634,14 @@ extern "C" int ceph_setattr(struct ceph_mount_info *cmount, const char *relpath, return cmount->get_client()->setattr(relpath, attr, mask); } +extern "C" int ceph_setattrx(struct ceph_mount_info *cmount, const char *relpath, + struct ceph_statx *stx, int mask, int flags) +{ + if (!cmount->is_mounted()) + return -ENOTCONN; + return cmount->get_client()->setattrx(relpath, stx, mask, flags); +} + // *xattr() calls supporting samba/vfs extern "C" int ceph_getxattr(struct ceph_mount_info *cmount, const char *path, const char *name, void *value, size_t size) { @@ -1429,6 +1437,13 @@ extern "C" int ceph_ll_setattr(class ceph_mount_info *cmount, return (cmount->get_client()->ll_setattr(in, st, mask, uid, gid)); } +extern "C" int ceph_ll_setattrx(class ceph_mount_info *cmount, + Inode *in, struct ceph_statx *stx, + int mask, int uid, int gid) +{ + return (cmount->get_client()->ll_setattrx(in, stx, mask, uid, gid)); +} + extern "C" int ceph_ll_open(class ceph_mount_info *cmount, Inode *in, int flags, Fh **fh, int uid, int gid) { diff --git a/src/test/libcephfs/test.cc b/src/test/libcephfs/test.cc index ac34f3f0d19b4..a4062a95e9b8d 100644 --- a/src/test/libcephfs/test.cc +++ b/src/test/libcephfs/test.cc @@ -1504,6 +1504,37 @@ TEST(LibCephFS, Btime) { ceph_shutdown(cmount); } +TEST(LibCephFS, SetBtime) { + struct ceph_mount_info *cmount; + ASSERT_EQ(ceph_create(&cmount, NULL), 0); + ASSERT_EQ(ceph_conf_read_file(cmount, NULL), 0); + ASSERT_EQ(0, ceph_conf_parse_env(cmount, NULL)); + ASSERT_EQ(ceph_mount(cmount, "/"), 0); + + char filename[32]; + sprintf(filename, "/setbtime%x", getpid()); + + ceph_unlink(cmount, filename); + int fd = ceph_open(cmount, filename, O_RDWR|O_CREAT|O_EXCL, 0666); + ASSERT_LT(0, fd); + ceph_close(cmount, fd); + + struct ceph_statx stx; + + stx.stx_btime = 1; + stx.stx_btime_ns = 2; + + ASSERT_EQ(ceph_setattrx(cmount, filename, &stx, CEPH_SETATTR_BTIME, 0), 0); + + ASSERT_EQ(ceph_statx(cmount, filename, &stx, CEPH_STATX_BTIME, 0), 0); + ASSERT_TRUE(stx.stx_mask & CEPH_STATX_BTIME); + + ASSERT_EQ(stx.stx_btime, 1); + ASSERT_EQ(stx.stx_btime_ns, 2); + + ceph_shutdown(cmount); +} + TEST(LibCephFS, LazyStatx) { struct ceph_mount_info *cmount1, *cmount2; ASSERT_EQ(ceph_create(&cmount1, NULL), 0); From aa48835ac0b57f4fa0cfcd4a0e726817816a4f9e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:41 -0400 Subject: [PATCH 29/37] client: add a ceph_fstatx Signed-off-by: Jeff Layton --- src/client/Client.cc | 25 +++++++++++++++++++++++++ src/client/Client.h | 1 + src/include/cephfs/libcephfs.h | 13 +++++++++++++ src/libcephfs.cc | 8 ++++++++ 4 files changed, 47 insertions(+) diff --git a/src/client/Client.cc b/src/client/Client.cc index 16b4ffaf33f1a..9c15f17a3bfeb 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -9030,6 +9030,31 @@ int Client::fstat(int fd, struct stat *stbuf, int mask) return r; } +int Client::fstatx(int fd, struct ceph_statx *stx, unsigned int want, unsigned int flags) +{ + Mutex::Locker lock(client_lock); + tout(cct) << "fstatx flags " << hex << flags << " want " << want << dec << std::endl; + tout(cct) << fd << std::endl; + + Fh *f = get_filehandle(fd); + if (!f) + return -EBADF; + + unsigned mask = statx_to_mask(flags, want); + + int r = 0; + if (mask && !f->inode->caps_issued_mask(mask)) { + r = _getattr(f->inode, mask); + if (r < 0) { + ldout(cct, 3) << "fstatx exit on error!" << dendl; + return r; + } + } + + fill_statx(f->inode, mask, stx); + ldout(cct, 3) << "fstatx(" << fd << ", " << stx << ") = " << r << dendl; + return r; +} // not written yet, but i want to link! diff --git a/src/client/Client.h b/src/client/Client.h index e5c655c4fbdea..8b674464a900c 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -1034,6 +1034,7 @@ class Client : public Dispatcher, public md_config_obs_t { int ftruncate(int fd, loff_t size); int fsync(int fd, bool syncdataonly); int fstat(int fd, struct stat *stbuf, int mask=CEPH_STAT_CAP_INODE_ALL); + int fstatx(int fd, struct ceph_statx *stx, unsigned int want, unsigned int flags); int fallocate(int fd, int mode, loff_t offset, loff_t length); // full path xattr ops diff --git a/src/include/cephfs/libcephfs.h b/src/include/cephfs/libcephfs.h index 688702130b621..bf38b2b8f2203 100644 --- a/src/include/cephfs/libcephfs.h +++ b/src/include/cephfs/libcephfs.h @@ -928,6 +928,19 @@ int ceph_fallocate(struct ceph_mount_info *cmount, int fd, int mode, */ int ceph_fstat(struct ceph_mount_info *cmount, int fd, struct stat *stbuf); +/** + * Get an open file's extended statistics and attributes. + * + * @param cmount the ceph mount handle to use for performing the stat. + * @param fd the file descriptor of the file to get statistics of. + * @param stx the ceph_statx struct that will be filled in with the file's statistics. + * @param want bitfield of CEPH_STATX_* flags showing designed attributes + * @param flags bitfield that can be used to set AT_* modifier flags (only AT_NO_ATTR_SYNC and AT_SYMLINK_NOFOLLOW) + * @returns 0 on success or negative error code on failure. + */ +int ceph_fstatx(struct ceph_mount_info *cmount, int fd, struct ceph_statx *stx, + unsigned int want, unsigned int flags); + /** @} file */ /** diff --git a/src/libcephfs.cc b/src/libcephfs.cc index 05ddf8e82b47f..9e65bff95d298 100644 --- a/src/libcephfs.cc +++ b/src/libcephfs.cc @@ -890,6 +890,14 @@ extern "C" int ceph_fstat(struct ceph_mount_info *cmount, int fd, struct stat *s return cmount->get_client()->fstat(fd, stbuf); } +extern "C" int ceph_fstatx(struct ceph_mount_info *cmount, int fd, struct ceph_statx *stx, + unsigned int want, unsigned int flags) +{ + if (!cmount->is_mounted()) + return -ENOTCONN; + return cmount->get_client()->fstatx(fd, stx, want, flags); +} + extern "C" int ceph_sync_fs(struct ceph_mount_info *cmount) { if (!cmount->is_mounted()) From c3efea6fc34a437c7e711305a6e49908496768b4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Aug 2016 07:16:41 -0400 Subject: [PATCH 30/37] client: extend the Btime test to cover fstatx as well Signed-off-by: Jeff Layton --- src/test/libcephfs/test.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/test/libcephfs/test.cc b/src/test/libcephfs/test.cc index a4062a95e9b8d..4efb3ff7f9465 100644 --- a/src/test/libcephfs/test.cc +++ b/src/test/libcephfs/test.cc @@ -1483,7 +1483,15 @@ TEST(LibCephFS, Btime) { int fd = ceph_open(cmount, filename, O_RDWR|O_CREAT|O_EXCL, 0666); ASSERT_LT(0, fd); + /* make sure fstatx works */ struct ceph_statx stx; + + ASSERT_EQ(ceph_fstatx(cmount, fd, &stx, CEPH_STATX_CTIME|CEPH_STATX_BTIME, 0), 0); + ASSERT_TRUE(stx.stx_mask & (CEPH_STATX_CTIME|CEPH_STATX_BTIME)); + ASSERT_EQ(stx.stx_btime, stx.stx_ctime); + ASSERT_EQ(stx.stx_btime_ns, stx.stx_ctime_ns); + ceph_close(cmount, fd); + ASSERT_EQ(ceph_statx(cmount, filename, &stx, CEPH_STATX_CTIME|CEPH_STATX_BTIME, 0), 0); ASSERT_TRUE(stx.stx_mask & (CEPH_STATX_CTIME|CEPH_STATX_BTIME)); ASSERT_EQ(stx.stx_btime, stx.stx_ctime); From 41041d9e93c5867f612614e35fbbd0108399defb Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 1 Sep 2016 14:35:59 -0400 Subject: [PATCH 31/37] SQUASH: client: request AUTH caps on the directory during create if we'll need to check perms locally Signed-off-by: Jeff Layton --- src/client/Client.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 9c15f17a3bfeb..fb2f1800e402d 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -7761,7 +7761,9 @@ int Client::open(const char *relpath, int flags, mode_t mode, int stripe_unit, string dname = dirpath.last_dentry(); dirpath.pop_dentry(); InodeRef dir; - r = path_walk(dirpath, &dir, true, 0, uid, gid); + r = path_walk(dirpath, &dir, true, + cct->_conf->client_permissions ? CEPH_CAP_AUTH_SHARED : 0, + uid, gid); if (r < 0) goto out; if (cct->_conf->client_permissions) { From a9d6478a905dcab3c782325f1663c65ff8e79da3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 1 Sep 2016 11:16:05 -0400 Subject: [PATCH 32/37] SQUASH: Revert "mds: ensure that change_attr reflects metadata changes on clients that hold CAP_FILE_EXCL" This reverts commit 26ab2fa10041ce34b25aa1535715131a2709a27f. I don't think this is necessary. We don't necessarily need to increment the change attribute on every change, as long as we ensure that it gets bumped iff there were changes since the last time you queried for it. IOW: it's ok to have a single change_attr change encompass a large set of changes, as long as you ensure that that it is larger by at least 1 after all of those changes. In order to look at (and potentially cache) other attributes under that change_attribute (e.g. owner or mode), you need to recall any outstanding exclusive caps for those attrs. That causes their change_attrs to be synched to the largest, which is enough to ensure that it changed in some way. Signed-off-by: Jeff Layton --- src/mds/Locker.cc | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 6a262f34e979a..b65dd9f286dc4 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -2978,6 +2978,12 @@ void Locker::_update_cap_fields(CInode *in, int dirty, MClientCaps *m, inode_t * pi->ctime = m->get_ctime(); } + if (dirty && m->get_change_attr() > pi->change_attr) { + dout(7) << " change_attr " << pi->change_attr << " -> " << m->get_change_attr() + << " for " << *in << dendl; + pi->change_attr = m->get_change_attr(); + } + // file if (dirty & (CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) { utime_t atime = m->get_atime(); @@ -3047,24 +3053,6 @@ void Locker::_update_cap_fields(CInode *in, int dirty, MClientCaps *m, inode_t * } } - /* - * Different clients can hold different exclusive caps on the same inode, - * but the change_attr field is under the aegis of the FILE cap. If the - * client isn't returning the FILE_EXCL cap, and the change_attr in the - * MClientCaps is not newer than the one in the inode, then we must bump it - * an extra time to ensure that any client that _does_ hold FILE_EXCL sees - * a new change_attr. - */ - if (dirty) { - if (m->get_change_attr() > pi->change_attr) { - dout(7) << " change_attr " << pi->change_attr << " -> " << m->get_change_attr() - << " for " << *in << dendl; - pi->change_attr = m->get_change_attr(); - } else if (!(dirty & (CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) { - pi->change_attr++; - } - } - } /* From 0ac0551ee2dfad81dfca8fd9e91081f4e4b17e10 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 1 Sep 2016 11:16:06 -0400 Subject: [PATCH 33/37] SQUASH: client: always take a change_attr update from the server if it's newer I think this is more correct in the client. If we see a newer change_attr, then we always want to take note of that fact, even if we have the right caps. Signed-off-by: Jeff Layton --- src/client/Client.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index fb2f1800e402d..c664dcd636bfc 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -725,6 +725,10 @@ void Client::update_inode_file_bits(Inode *in, in->inline_version = inline_version; } + /* always take a newer change attr */ + if (change_attr > in->change_attr) + in->change_attr = change_attr; + if (truncate_seq > in->truncate_seq || (truncate_seq == in->truncate_seq && size > in->size)) { ldout(cct, 10) << "size " << in->size << " -> " << size << dendl; @@ -770,8 +774,6 @@ void Client::update_inode_file_bits(Inode *in, ldout(cct, 30) << "Yay have enough caps to look at our times" << dendl; if (ctime > in->ctime) in->ctime = ctime; - if (change_attr > in->change_attr) - in->change_attr = change_attr; if (time_warp_seq > in->time_warp_seq) { ldout(cct, 10) << "mds time_warp_seq " << time_warp_seq << " on inode " << *in << " is higher than local time_warp_seq " From 44b102f9e2e2acb854899903557068482afdf0fa Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 1 Sep 2016 14:18:55 -0400 Subject: [PATCH 34/37] SQUASH: mds/client: don't get clever with ceph_mds_request_head_legacy encode/decode As requested by Greg... Declare a legacy object on the stack, and do an extra copy to/from it. Signed-off-by: Jeff Layton --- src/messages/MClientRequest.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/messages/MClientRequest.h b/src/messages/MClientRequest.h index 26f69f7f3ec72..00419ebb5aca4 100644 --- a/src/messages/MClientRequest.h +++ b/src/messages/MClientRequest.h @@ -165,11 +165,10 @@ class MClientRequest : public Message { if (header.version >= 4) { ::decode(head, p); } else { - struct ceph_mds_request_head_legacy *old_mds_head = - (struct ceph_mds_request_head_legacy *)&(head.oldest_client_tid); - - ::decode(*old_mds_head, p); + struct ceph_mds_request_head_legacy old_mds_head; + ::decode(old_mds_head, p); + memcpy(&head.oldest_client_tid, &old_mds_head, sizeof(old_mds_head)); head.version = 0; /* Can't set the btime from legacy struct */ @@ -197,10 +196,10 @@ class MClientRequest : public Message { if (features & CEPH_FEATURE_FS_BTIME) { ::encode(head, payload); } else { - struct ceph_mds_request_head_legacy *old_mds_head = - (struct ceph_mds_request_head_legacy *)&(head.oldest_client_tid); + struct ceph_mds_request_head_legacy old_mds_head; - ::encode(*old_mds_head, payload); + memcpy(&old_mds_head, &(head.oldest_client_tid), sizeof(old_mds_head)); + ::encode(old_mds_head, payload); } ::encode(path, payload); From 821e77cf1da59b9dcf3517101dc8b923e5358960 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 1 Sep 2016 18:49:46 -0400 Subject: [PATCH 35/37] SQUASH: client: querying for stx_version field requires getting all shared caps Otherwise, someone could potentially query for just CEPH_STATX_VERSION, and see it as unchanged, even when there are changes buffered up on other clients. By doing this, I don't think we'll incur any perf hit in the common use case which is ganesha querying for all attributes. We are adding Xs here, but unless there is a lot of xattr activity I don't think that will generally cause a lot of exclusive cap recalls. Also, we don't actually need AUTH caps to fetch CEPH_STATX_RDEV, remove that from statx_to_mask. PIN is sufficient there. Signed-off-by: Jeff Layton --- src/client/Client.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index c664dcd636bfc..39e6760f5c28e 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -6724,13 +6724,14 @@ unsigned Client::statx_to_mask(unsigned int flags, unsigned int want) /* Always set PIN to distinguish from AT_NO_ATTR_SYNC case */ mask |= CEPH_CAP_PIN; - if (want & (CEPH_STATX_MODE|CEPH_STATX_UID|CEPH_STATX_GID|CEPH_STATX_RDEV|CEPH_STATX_BTIME)) + if (want & (CEPH_STATX_MODE|CEPH_STATX_UID|CEPH_STATX_GID|CEPH_STATX_BTIME|CEPH_STATX_VERSION)) mask |= CEPH_CAP_AUTH_SHARED; - if (want & CEPH_STATX_NLINK) + if (want & CEPH_STATX_NLINK|CEPH_STATX_VERSION) mask |= CEPH_CAP_LINK_SHARED; if (want & (CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME|CEPH_STATX_SIZE|CEPH_STATX_BLOCKS|CEPH_STATX_VERSION)) mask |= CEPH_CAP_FILE_SHARED; - + if (want & CEPH_STATX_VERSION) + mask |= CEPH_CAP_XATTR_SHARED; out: return mask; } From 7c59b9e96a37ecf8130efec29ec6443d7793473a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 2 Sep 2016 08:36:59 -0400 Subject: [PATCH 36/37] SQUASH: mds/client: add routines to copy to/from ceph_mds_request_head_legacy Signed-off-by: Jeff Layton --- src/include/ceph_fs.h | 14 ++++++++++++++ src/messages/MClientRequest.h | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 73701a6ba6d9d..946a10d4ba526 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -559,6 +559,20 @@ struct ceph_mds_request_release { __le32 dname_len; /* string follows. */ } __attribute__ ((packed)); +static inline void +copy_from_legacy_head(struct ceph_mds_request_head *head, + struct ceph_mds_request_head_legacy *legacy) +{ + memcpy(&(head->oldest_client_tid), legacy, sizeof(*legacy)); +} + +static inline void +copy_to_legacy_head(struct ceph_mds_request_head_legacy *legacy, + struct ceph_mds_request_head *head) +{ + memcpy(legacy, &(head->oldest_client_tid), sizeof(*legacy)); +} + /* client reply */ struct ceph_mds_reply_head { __le32 op; diff --git a/src/messages/MClientRequest.h b/src/messages/MClientRequest.h index 00419ebb5aca4..543761ec473d0 100644 --- a/src/messages/MClientRequest.h +++ b/src/messages/MClientRequest.h @@ -168,7 +168,7 @@ class MClientRequest : public Message { struct ceph_mds_request_head_legacy old_mds_head; ::decode(old_mds_head, p); - memcpy(&head.oldest_client_tid, &old_mds_head, sizeof(old_mds_head)); + copy_from_legacy_head(&head, &old_mds_head); head.version = 0; /* Can't set the btime from legacy struct */ @@ -198,7 +198,7 @@ class MClientRequest : public Message { } else { struct ceph_mds_request_head_legacy old_mds_head; - memcpy(&old_mds_head, &(head.oldest_client_tid), sizeof(old_mds_head)); + copy_to_legacy_head(&old_mds_head, &head); ::encode(old_mds_head, payload); } From 8e75bc144f4f57d0bfbaabe389061b2d81a486f2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 2 Sep 2016 14:27:08 -0400 Subject: [PATCH 37/37] SQUASH: client: request all shared caps if ctime was requested Much like the change_attr, ctime changes can potentially be cached on clients. Request all shared caps if the want mask specifies the ctime. Signed-off-by: Jeff Layton --- src/client/Client.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 39e6760f5c28e..d27dec188966f 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -6724,13 +6724,13 @@ unsigned Client::statx_to_mask(unsigned int flags, unsigned int want) /* Always set PIN to distinguish from AT_NO_ATTR_SYNC case */ mask |= CEPH_CAP_PIN; - if (want & (CEPH_STATX_MODE|CEPH_STATX_UID|CEPH_STATX_GID|CEPH_STATX_BTIME|CEPH_STATX_VERSION)) + if (want & (CEPH_STATX_MODE|CEPH_STATX_UID|CEPH_STATX_GID|CEPH_STATX_BTIME|CEPH_STATX_CTIME|CEPH_STATX_VERSION)) mask |= CEPH_CAP_AUTH_SHARED; - if (want & CEPH_STATX_NLINK|CEPH_STATX_VERSION) + if (want & (CEPH_STATX_NLINK|CEPH_STATX_CTIME|CEPH_STATX_VERSION)) mask |= CEPH_CAP_LINK_SHARED; if (want & (CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME|CEPH_STATX_SIZE|CEPH_STATX_BLOCKS|CEPH_STATX_VERSION)) mask |= CEPH_CAP_FILE_SHARED; - if (want & CEPH_STATX_VERSION) + if (want & (CEPH_STATX_VERSION|CEPH_STATX_CTIME)) mask |= CEPH_CAP_XATTR_SHARED; out: return mask;