Skip to content

Commit

Permalink
mds: add ceph.dir.bal.mask vxattr for MDS Balancer
Browse files Browse the repository at this point in the history
Signed-off-by: Yongseok Oh <yongseok.oh@linecorp.com>
  • Loading branch information
yongseokoh committed Jul 10, 2023
1 parent 0447bb3 commit ab04b9d
Show file tree
Hide file tree
Showing 15 changed files with 314 additions and 27 deletions.
12 changes: 10 additions & 2 deletions src/include/cephfs/types.h
Expand Up @@ -65,6 +65,7 @@ typedef int32_t mds_rank_t;
constexpr mds_rank_t MDS_RANK_NONE = -1;
constexpr mds_rank_t MDS_RANK_EPHEMERAL_DIST = -2;
constexpr mds_rank_t MDS_RANK_EPHEMERAL_RAND = -3;
constexpr mds_rank_t MDS_RANK_MASK = -4;

struct scatter_info_t {
version_t version = 0;
Expand Down Expand Up @@ -527,6 +528,7 @@ struct inode_t {

double export_ephemeral_random_pin = 0;
bool export_ephemeral_distributed_pin = false;
std::string bal_rank_mask;

// special stuff
version_t version = 0; // auth only
Expand Down Expand Up @@ -555,7 +557,7 @@ struct inode_t {
template<template<typename> class Allocator>
void inode_t<Allocator>::encode(ceph::buffer::list &bl, uint64_t features) const
{
ENCODE_START(19, 6, bl);
ENCODE_START(20, 6, bl);

encode(ino, bl);
encode(rdev, bl);
Expand Down Expand Up @@ -614,13 +616,14 @@ void inode_t<Allocator>::encode(ceph::buffer::list &bl, uint64_t features) const
encode(fscrypt_auth, bl);
encode(fscrypt_file, bl);
encode(fscrypt_last_block, bl);
encode(bal_rank_mask, bl);
ENCODE_FINISH(bl);
}

template<template<typename> class Allocator>
void inode_t<Allocator>::decode(ceph::buffer::list::const_iterator &p)
{
DECODE_START_LEGACY_COMPAT_LEN(19, 6, 6, p);
DECODE_START_LEGACY_COMPAT_LEN(20, 6, 6, p);

decode(ino, p);
decode(rdev, p);
Expand Down Expand Up @@ -731,6 +734,10 @@ void inode_t<Allocator>::decode(ceph::buffer::list::const_iterator &p)
if (struct_v >= 19) {
decode(fscrypt_last_block, p);
}

if (struct_v >= 20) {
decode(bal_rank_mask, p);
}
DECODE_FINISH(p);
}

Expand Down Expand Up @@ -770,6 +777,7 @@ void inode_t<Allocator>::dump(ceph::Formatter *f) const
f->dump_int("export_pin", export_pin);
f->dump_int("export_ephemeral_random_pin", export_ephemeral_random_pin);
f->dump_bool("export_ephemeral_distributed_pin", export_ephemeral_distributed_pin);
f->dump_stream("bal_rank_mask") << bal_rank_mask;

f->open_array_section("client_ranges");
for (const auto &p : client_ranges) {
Expand Down
5 changes: 5 additions & 0 deletions src/mds/CDir.cc
Expand Up @@ -2855,6 +2855,11 @@ mds_rank_t CDir::get_export_pin(bool inherit) const
return export_pin;
}

std::string CDir::get_rank_mask(bool inherit) const
{
return inode->get_rank_mask(inherit);
}

bool CDir::is_exportable(mds_rank_t dest) const
{
mds_rank_t export_pin = get_export_pin();
Expand Down
1 change: 1 addition & 0 deletions src/mds/CDir.h
Expand Up @@ -515,6 +515,7 @@ class CDir : public MDSCacheObject, public Counter<CDir> {

// -- import/export --
mds_rank_t get_export_pin(bool inherit=true) const;
std::string get_rank_mask(bool inherit=true) const;
bool is_exportable(mds_rank_t dest) const;

void encode_export(ceph::buffer::list& bl);
Expand Down
59 changes: 57 additions & 2 deletions src/mds/CInode.cc
Expand Up @@ -491,6 +491,8 @@ void CInode::pop_and_dirty_projected_inode(LogSegment *ls, const MutationRef& mu
bool pin_updated = (get_inode()->export_pin != front.inode->export_pin) ||
(get_inode()->export_ephemeral_distributed_pin !=
front.inode->export_ephemeral_distributed_pin);
bool bal_rank_mask_updated = get_inode()->bal_rank_mask !=
front.inode->bal_rank_mask;

reset_inode(std::move(front.inode));
if (front.xattrs != get_xattrs())
Expand All @@ -505,7 +507,7 @@ void CInode::pop_and_dirty_projected_inode(LogSegment *ls, const MutationRef& mu
if (get_inode()->is_backtrace_updated())
mark_dirty_parent(ls, pool_updated);

if (pin_updated)
if (pin_updated || bal_rank_mask_updated)
maybe_export_pin(true);
}

Expand Down Expand Up @@ -2127,6 +2129,7 @@ void CInode::encode_lock_ipolicy(bufferlist& bl)
encode(get_inode()->export_pin, bl);
encode(get_inode()->export_ephemeral_distributed_pin, bl);
encode(get_inode()->export_ephemeral_random_pin, bl);
encode(get_inode()->bal_rank_mask, bl);
}
ENCODE_FINISH(bl);
}
Expand All @@ -2148,13 +2151,16 @@ void CInode::decode_lock_ipolicy(bufferlist::const_iterator& p)
if (struct_v >= 2) {
decode(_inode->export_ephemeral_distributed_pin, p);
decode(_inode->export_ephemeral_random_pin, p);
decode(_inode->bal_rank_mask, p);
}
}
DECODE_FINISH(p);

bool pin_updated = (get_inode()->export_pin != _inode->export_pin) ||
(get_inode()->export_ephemeral_distributed_pin !=
_inode->export_ephemeral_distributed_pin);
bool bal_rank_mask_updated = get_inode()->bal_rank_mask!= _inode->bal_rank_mask;
pin_updated |= bal_rank_mask_updated;
reset_inode(std::move(_inode));
maybe_export_pin(pin_updated);
}
Expand Down Expand Up @@ -5247,6 +5253,8 @@ void CInode::queue_export_pin(mds_rank_t export_pin)
target = export_pin;
else if (export_pin == MDS_RANK_EPHEMERAL_RAND)
target = mdcache->hash_into_rank_bucket(ino());
else if (export_pin == MDS_RANK_MASK)
target = MDS_RANK_MASK;
else
target = MDS_RANK_NONE;

Expand Down Expand Up @@ -5300,8 +5308,17 @@ void CInode::maybe_export_pin(bool update)
dout(15) << __func__ << " update=" << update << " " << *this << dendl;

mds_rank_t export_pin = get_export_pin(false);
if (export_pin == MDS_RANK_NONE && !update)
if (export_pin == MDS_RANK_NONE && !update) {
return;
}

if (export_pin == MDS_RANK_NONE) {
std::string bal_rank_mask = get_rank_mask(false);
if (bal_rank_mask.size() == 0 && !update) {
return;
}
export_pin = MDS_RANK_MASK;
}

check_pin_policy(export_pin);
queue_export_pin(export_pin);
Expand Down Expand Up @@ -5398,6 +5415,44 @@ void CInode::setxattr_ephemeral_dist(bool val)
_get_projected_inode()->export_ephemeral_distributed_pin = val;
}

void CInode::setxattr_bal_rank_mask(std::string val)
{
ceph_assert(is_dir());
_get_projected_inode()->bal_rank_mask = val;
}

std::string CInode::get_rank_mask(bool inherit) const
{
if (!g_conf()->mds_bal_export_pin)
return "";

const CInode *in = this;
const CDir *dir = nullptr;
while (true) {
if (in->is_system())
break;
const CDentry *pdn = in->get_parent_dn();
if (!pdn)
break;
if (in->get_inode()->nlink == 0) {
// ignore export pin for unlinked directory
break;
}

std::string bal_rank_mask = in->get_inode()->bal_rank_mask;
if (bal_rank_mask.size()) {
return bal_rank_mask;
}

if (!inherit)
break;
dir = pdn->get_dir();
in = dir->inode;
}
return "";
}


void CInode::set_export_pin(mds_rank_t rank)
{
ceph_assert(is_dir());
Expand Down
2 changes: 2 additions & 0 deletions src/mds/CInode.h
Expand Up @@ -1002,6 +1002,7 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
return !projected_parent.empty();
}

std::string get_rank_mask(bool inherit=true) const;
mds_rank_t get_export_pin(bool inherit=true) const;
void check_pin_policy(mds_rank_t target);
void set_export_pin(mds_rank_t rank);
Expand All @@ -1019,6 +1020,7 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
double get_ephemeral_rand() const;
void maybe_ephemeral_rand(double threshold=-1.0);
void setxattr_ephemeral_rand(double prob=0.0);
void setxattr_bal_rank_mask(std::string val);
bool is_ephemeral_rand() const {
return state_test(STATE_RANDEPHEMERALPIN);
}
Expand Down

0 comments on commit ab04b9d

Please sign in to comment.