Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cephfs: support WORM(Write Once Read Many) feature #26691

Closed
wants to merge 8 commits into from
351 changes: 346 additions & 5 deletions src/client/Client.cc

Large diffs are not rendered by default.

21 changes: 19 additions & 2 deletions src/client/Client.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ enum {
l_c_last,
};


class MDSCommandOp : public CommandOp
{
public:
Expand Down Expand Up @@ -661,6 +660,7 @@ class Client : public Dispatcher, public md_config_obs_t {
vector<snapid_t>& snaps);

void handle_quota(const MConstRef<MClientQuota>& m);
void handle_worm(const MConstRef<MClientWorm>& m);
void handle_snap(const MConstRef<MClientSnap>& m);
void handle_caps(const MConstRef<MClientCaps>& m);
void handle_cap_import(MetaSession *session, Inode *in, const MConstRef<MClientCaps>& m);
Expand Down Expand Up @@ -739,6 +739,12 @@ class Client : public Dispatcher, public md_config_obs_t {
bool use_faked_inos() { return _use_faked_inos; }
vinodeno_t map_faked_ino(ino_t ino);

int ll_set_flags(Fh *fh, int worm_flag, const UserPerm& perms);
int ll_set_flags(Inode *in, int worm_flag, const UserPerm& perms);

int ll_get_flags(Fh *fh, const UserPerm& perms);
int ll_get_flags(Inode *in, const UserPerm& perms);

//notify the mds to flush the mdlog
void flush_mdlog_sync();
void flush_mdlog(MetaSession *session);
Expand Down Expand Up @@ -1093,7 +1099,7 @@ class Client : public Dispatcher, public md_config_obs_t {
const UserPerm& perms);
int _listxattr(Inode *in, char *names, size_t len, const UserPerm& perms);
int _do_setxattr(Inode *in, const char *name, const void *value, size_t len,
int flags, const UserPerm& perms);
int flags, int worm_flags, const UserPerm& perms);
int _setxattr(Inode *in, const char *name, const void *value, size_t len,
int flags, const UserPerm& perms);
int _setxattr(InodeRef &in, const char *name, const void *value, size_t len,
Expand Down Expand Up @@ -1163,6 +1169,14 @@ class Client : public Dispatcher, public md_config_obs_t {
size_t _vxattrcb_dir_rsubdirs(Inode *in, char *val, size_t size);
size_t _vxattrcb_dir_rbytes(Inode *in, char *val, size_t size);
size_t _vxattrcb_dir_rctime(Inode *in, char *val, size_t size);
bool _vxattrcb_worm_exists(Inode *in);
size_t _vxattrcb_worm(Inode *in, char *val, size_t size);
size_t _vxattrcb_worm_state(Inode *in, char *val, size_t size);
size_t _vxattrcb_worm_retention_period(Inode *in, char *val, size_t size);
size_t _vxattrcb_worm_auto_commit_period(Inode *in, char *val, size_t size);
size_t _vxattrcb_worm_max_retention_period(Inode *in, char *val, size_t size);
size_t _vxattrcb_worm_min_retention_period(Inode *in, char *val, size_t size);
size_t _vxattrcb_worm_exp_time(Inode *in, char *val, size_t size);

bool _vxattrcb_dir_pin_exists(Inode *in);
size_t _vxattrcb_dir_pin(Inode *in, char *val, size_t size);
Expand Down Expand Up @@ -1193,6 +1207,9 @@ class Client : public Dispatcher, public md_config_obs_t {
int _lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode=NULL);
bool _ll_forget(Inode *in, uint64_t count);

int worm_state_transition(Inode *in, const UserPerm& perms, int op);
int set_worm_exptime(Inode *in, const char *name, const void *value,
size_t size, int flags, const UserPerm& perms);

uint32_t deleg_timeout = 0;

Expand Down
3 changes: 3 additions & 0 deletions src/client/Inode.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ ostream& operator<<(ostream &out, const Inode &in)
if (in.quota.is_enable())
out << " " << in.quota;

if (in.worm.is_enable())
out << " " << in.worm;

out << ' ' << &in << ")";
return out;
}
Expand Down
2 changes: 2 additions & 0 deletions src/client/Inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ struct Inode {
unsigned flags;

quota_info_t quota;

worm_info_t worm;

bool is_complete_and_ordered() {
static const unsigned wants = I_COMPLETE | I_DIR_ORDERED;
Expand Down
38 changes: 38 additions & 0 deletions src/client/fuse_ll.cc
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,44 @@ static void fuse_ll_ioctl(fuse_req_t req, fuse_ino_t ino, int cmd, void *arg, st
fuse_reply_ioctl(req, 0, &l, sizeof(struct ceph_ioctl_layout));
}
break;
#if defined(__linux__)
case FS_IOC_GETFLAGS:{
int attr = 0;
Fh *fh = (Fh*)fi->fh;
const struct fuse_ctx *ctx = fuse_req_ctx(req);
UserPerm perms(ctx->uid, ctx->gid);
get_fuse_groups(perms, req);
int ret = cfuse->client->ll_get_flags(fh, perms);
if (!ret) {
attr |= FS_IMMUTABLE_FL;
}

fuse_reply_ioctl(req, 0, &attr, sizeof(attr));
}
break;
case FS_IOC_SETFLAGS: {
auto flags = (int *)in_buf;

if (*flags & (~FS_IMMUTABLE_FL)) {
fuse_reply_err(req, EOPNOTSUPP);
return;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a little concerned with this. Suppose I accidentally set this on the wrong file. How do I fix it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think may be only wait for file protection time expires 。any suggestion?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think waiting will be acceptable -- we will need to be able to fix this in some fashion, as we can't just throw away the filesystem and start over (as you would with a CD-R or something).

Maybe this could only be allowed from certain clients using a new cephfs client capability flag, and those clients would be able to undo this as well?

http://docs.ceph.com/docs/master/cephfs/client-auth/

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK ! Already add a new cap flag "i" and "chattr -i"" to expired a file which is in retained state.


int worm_flag = *flags & FS_IMMUTABLE_FL;
Fh *fh = (Fh*)fi->fh;
const struct fuse_ctx *ctx = fuse_req_ctx(req);
UserPerm perms(ctx->uid, ctx->gid);
get_fuse_groups(perms, req);
int ret = cfuse->client->ll_set_flags(fh, worm_flag, perms);
if (ret) {
fuse_reply_err(req, -ret);
return;
}

fuse_reply_ioctl(req, 0, NULL, 0);
}
break;
#endif
default:
fuse_reply_err(req, EINVAL);
}
Expand Down
1 change: 1 addition & 0 deletions src/client/ioctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#if defined(__linux__)
#include <linux/ioctl.h>
#include <linux/types.h>
#include <linux/fs.h>
#elif defined(__APPLE__) || defined(__FreeBSD__)
#include <sys/ioctl.h>
#include <sys/types.h>
Expand Down
24 changes: 24 additions & 0 deletions src/common/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,11 @@ constexpr unsigned long long operator"" _hr (unsigned long long hr) {
constexpr unsigned long long operator"" _day (unsigned long long day) {
return day * 60 * 60 * 24;
}

constexpr unsigned long long operator"" _year (unsigned long long year) {
return year * 60 * 60 * 24 * 365;
}

constexpr unsigned long long operator"" _K (unsigned long long n) {
return n << 10;
}
Expand Down Expand Up @@ -8190,6 +8195,25 @@ std::vector<Option> get_mds_options() {
.set_default(2.0)
.set_description("task status update interval to manager")
.set_long_description("interval (in seconds) for sending mds task status to ceph manager"),

Option("mds_worm_commit_period", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(2_hr)
.set_min_max(1_min, 30_day)
.set_description("default auto commit period for worm feature"),

Option("mds_worm_retention_period", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems these three options are all sharing the same default value. Did you do that deliberately?

Also I've observed you silently changed the perm of all files you touched from 100644->100755, which I guess should definitely need to be addressed first..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These options had been redesigned, I think mds_worm_retention_period may be equal to mds_worm_min_retention_period and set them to "1h", mds_worm_max_retention_period may be larger than 'mds_worm_min_retention_period` and set it to "30 years", What do you think?

the modified file mode already restore to 100644, thanks!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might switch to the inline .set_min & .set_max helpers to perform the boundary checking, then

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed it,thanks

.set_default(1_hr)
.set_description("default retention period for worm feature,this value must between mds_worm_min_retention_period and mds_worm_max_retention_period"),

Option("mds_worm_min_retention_period", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(1_hr)
.set_min_max(1_min, 30_year)
.set_description("min retention period for worm feature"),

Option("mds_worm_max_retention_period", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(30_year)
.set_min_max(1_hr, 70_year)
.set_description("max retention period for worm feature"),
});
}

Expand Down
2 changes: 2 additions & 0 deletions src/include/ceph_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
DEFINE_CEPH_FEATURE(59, 2, MDS_WORM)
DEFINE_CEPH_FEATURE(60, 1, OSD_RECOVERY_DELETES) // *do not share this bit*
DEFINE_CEPH_FEATURE(61, 1, CEPHX_V2) // 4.19, *do not share this bit*

Expand Down Expand Up @@ -244,6 +245,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
CEPH_FEATURE_CEPHX_V2 | \
CEPH_FEATURE_OSD_PGLOG_HARDLIMIT | \
CEPH_FEATUREMASK_SERVER_OCTOPUS | \
CEPH_FEATURE_MDS_WORM | \
0ULL)

#define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL
Expand Down
5 changes: 5 additions & 0 deletions src/include/ceph_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ extern const char *ceph_con_mode_name(int con_mode);
#define CEPH_MSG_CLIENT_SNAP 0x312
#define CEPH_MSG_CLIENT_CAPRELEASE 0x313
#define CEPH_MSG_CLIENT_QUOTA 0x314
#define CEPH_MSG_CLIENT_WORM 0x315

/* pool ops */
#define CEPH_MSG_POOLOP_REPLY 48
Expand Down Expand Up @@ -432,6 +433,8 @@ int ceph_flags_sys2wire(int flags);
#define CEPH_XATTR_REPLACE (1 << 1)
#define CEPH_XATTR_REMOVE (1 << 31)

#define CEPH_XATTR_WANT_SET_WORM_CAPS 1

/*
* readdir request flags;
*/
Expand Down Expand Up @@ -486,6 +489,7 @@ union ceph_mds_request_args_legacy {
struct {
__le32 flags;
__le32 osdmap_epoch; /* use for set file/dir layout */
__le32 set_worm_attr; /* use for set worm attr */
} __attribute__ ((packed)) setxattr;
struct {
struct ceph_file_layout layout;
Expand Down Expand Up @@ -562,6 +566,7 @@ union ceph_mds_request_args {
struct {
__le32 flags;
__le32 osdmap_epoch; /* use for set file/dir layout */
__le32 set_worm_attr; /* use for set worm attr */
} __attribute__ ((packed)) setxattr;
struct {
struct ceph_file_layout layout;
Expand Down
8 changes: 7 additions & 1 deletion src/mds/CInode.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3826,7 +3826,7 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session,
* note: encoding matches MClientReply::InodeStat
*/
if (session->info.has_feature(CEPHFS_FEATURE_REPLY_ENCODING)) {
ENCODE_START(3, 1, bl);
ENCODE_START(4, 1, bl);
encode(oi->ino, bl);
encode(snapid, bl);
encode(oi->rdev, bl);
Expand Down Expand Up @@ -3869,6 +3869,7 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session,
encode(any_i->change_attr, bl);
encode(file_i->export_pin, bl);
encode(snap_btime, bl);
encode(policy_i->worm, bl);
ENCODE_FINISH(bl);
}
else {
Expand Down Expand Up @@ -3925,6 +3926,11 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session,
encode(any_i->btime, bl);
encode(any_i->change_attr, bl);
}

if (session->get_connection()->has_feature(CEPH_FEATURE_MDS_WORM)){
mempool_inode *policy_i = ppolicy ? pi : oi;
encode(policy_i->worm, bl);
}
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needs to bump encoding version

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed it,thanks

return valid;
Expand Down
22 changes: 22 additions & 0 deletions src/mds/MDCache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2028,6 +2028,28 @@ void MDCache::broadcast_quota_to_client(CInode *in, client_t exclude_ct, bool qu
}
}

void MDCache::broadcast_worm_to_client(CInode *in, client_t exclude_ct)
{
if (!in->is_auth() || in->is_frozen())
return;

auto i = in->get_projected_inode();

for (auto &p : in->client_caps) {
Session *session = mds->get_session(p.first);
if (!session || !session->get_connection() ||
!session->get_connection()->has_feature(CEPH_FEATURE_MDS_WORM))
continue;

if (exclude_ct >= 0 && exclude_ct != p.first) {
auto msg = make_message<MClientWorm>();
msg->ino = in->ino();
msg->worm = i->worm;
mds->send_message_client_counted(msg, session->get_connection());
}
}
}

/*
* NOTE: we _have_ to delay the scatter if we are called during a
* rejoin, because we can't twiddle locks between when the
Expand Down
3 changes: 2 additions & 1 deletion src/mds/MDCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
#include "messages/MMDSResolveAck.h"
#include "messages/MMDSSlaveRequest.h"
#include "messages/MMDSSnapUpdate.h"

#include "messages/MClientWorm.h"

#include "osdc/Filer.h"
#include "CInode.h"
Expand Down Expand Up @@ -454,6 +454,7 @@ class MDCache {
snapid_t ofirst, snapid_t last,
CInode *pin, bool cow_head);
void broadcast_quota_to_client(CInode *in, client_t exclude_ct = -1, bool quota_change = false);
void broadcast_worm_to_client(CInode *in, client_t exclude_ct = -1);
void predirty_journal_parents(MutationRef mut, EMetaBlob *blob,
CInode *in, CDir *parent,
int flags, int linkunlink=0,
Expand Down
13 changes: 13 additions & 0 deletions src/mds/MDSAuthCaps.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ struct MDSCapParser : qi::grammar<Iterator, MDSAuthCaps()>
(lit("rw"))[_val = MDSCapSpec(MDSCapSpec::RW)]
|
(lit("r"))[_val = MDSCapSpec(MDSCapSpec::READ)]
|
(lit("rwpi"))[_val = MDSCapSpec(MDSCapSpec::RWPI)]
|
(lit("rwpsi"))[_val = MDSCapSpec(MDSCapSpec::RWPSI)]
);

grant = lit("allow") >> (capspec >> match >>
Expand Down Expand Up @@ -246,6 +250,12 @@ bool MDSAuthCaps::is_capable(std::string_view inode_path,
}
}

if (mask & MAY_SET_WORM) {
if (!grant.spec.allow_set_worm()) {
continue;
}
}

if (mask & MAY_SNAPSHOT) {
if (!grant.spec.allow_snapshot()) {
continue;
Expand Down Expand Up @@ -396,6 +406,9 @@ ostream &operator<<(ostream &out, const MDSCapSpec &spec)
if (spec.allow_snapshot()) {
out << "s";
}
if (spec.allow_set_worm()) {
out << "i";
}
}

return out;
Expand Down
10 changes: 9 additions & 1 deletion src/mds/MDSAuthCaps.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ enum {
MAY_CHGRP = (1 << 5),
MAY_SET_VXATTR = (1 << 6),
MAY_SNAPSHOT = (1 << 7),
MAY_SET_WORM = (1 << 8),
};

class CephContext;
Expand All @@ -46,16 +47,20 @@ struct MDSCapSpec {
static const unsigned SET_VXATTR = (1 << 3);
// if the capability permits mksnap/rmsnap
static const unsigned SNAPSHOT = (1 << 4);
// if the capability permits setting worm attr
static const unsigned SET_WORM = (1 << 5);

static const unsigned RW = (READ|WRITE);
static const unsigned RWP = (READ|WRITE|SET_VXATTR);
static const unsigned RWS = (READ|WRITE|SNAPSHOT);
static const unsigned RWPS = (READ|WRITE|SET_VXATTR|SNAPSHOT);
static const unsigned RWPI = (READ|WRITE|SET_VXATTR|SET_WORM);
static const unsigned RWPSI = (READ|WRITE|SET_VXATTR|SNAPSHOT|SET_WORM);

MDSCapSpec() = default;
MDSCapSpec(unsigned _caps) : caps(_caps) {
if (caps & ALL)
caps |= RWPS;
caps |= RWPSI;
}

bool allow_all() const {
Expand Down Expand Up @@ -84,6 +89,9 @@ struct MDSCapSpec {
bool allow_set_vxattr() const {
return (caps & SET_VXATTR);
}
bool allow_set_worm() const {
return (caps & SET_WORM);
}
private:
unsigned caps = 0;
};
Expand Down