Skip to content

Commit

Permalink
os/chain_xattr: stripe shortish xattrs over small chunks for XFS
Browse files Browse the repository at this point in the history
XFS has a hard limit of 255 (or 254?) bytes for xattrs to be inlined in the
inode due to a single byte for the length in the on-disk format.  If we
have an xattr that is a bit bigger than that it will get kicked out into a
separate extent, but if we stripe it across a few shorter xattrs it can
be inlined when the xfs inode is e.g. 2K.

Adjust the chain_xattr logic to capture this case.  Note that we are doing
this unconditionally regardless of fs type, but that is probably okay
given that most users use XFS and the cost isn't huge.

Reported-by: Ning Yao <yaoning@ruijie.com.cn>
Signed-off-by: Sage Weil <sage@redhat.com>
  • Loading branch information
liewegas committed Mar 18, 2015
1 parent 5901a5d commit 7dc5cf4
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 12 deletions.
43 changes: 31 additions & 12 deletions src/os/chain_xattr.cc
Expand Up @@ -116,7 +116,8 @@ static int getxattr_len(const char *fn, const char *name)
break;
total += r;
i++;
} while (r == CHAIN_XATTR_MAX_BLOCK_LEN);
} while (r == CHAIN_XATTR_MAX_BLOCK_LEN ||
r == CHAIN_XATTR_SHORT_BLOCK_LEN);

return total;
}
Expand All @@ -135,7 +136,6 @@ int chain_getxattr(const char *fn, const char *name, void *val, size_t size)
do {
chunk_size = (size < CHAIN_XATTR_MAX_BLOCK_LEN ? size : CHAIN_XATTR_MAX_BLOCK_LEN);
get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
size -= chunk_size;

r = sys_getxattr(fn, raw_name, (char *)val + pos, chunk_size);
if (i && r == -ENODATA) {
Expand All @@ -147,17 +147,21 @@ int chain_getxattr(const char *fn, const char *name, void *val, size_t size)
break;
}

if (r > 0)
if (r > 0) {
pos += r;
size -= r;
}

i++;
} while (size && r == CHAIN_XATTR_MAX_BLOCK_LEN);
} while (size && (r == CHAIN_XATTR_MAX_BLOCK_LEN ||
r == CHAIN_XATTR_SHORT_BLOCK_LEN));

if (r >= 0) {
ret = pos;
/* is there another chunk? that can happen if the last read size span over
exactly one block */
if (chunk_size == CHAIN_XATTR_MAX_BLOCK_LEN) {
if (chunk_size == CHAIN_XATTR_MAX_BLOCK_LEN ||
chunk_size == CHAIN_XATTR_SHORT_BLOCK_LEN) {
get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
r = sys_getxattr(fn, raw_name, 0, 0);
if (r > 0) { // there's another chunk.. the original buffer was too small
Expand All @@ -183,7 +187,8 @@ static int chain_fgetxattr_len(int fd, const char *name)
break;
total += r;
i++;
} while (r == CHAIN_XATTR_MAX_BLOCK_LEN);
} while (r == CHAIN_XATTR_MAX_BLOCK_LEN ||
r == CHAIN_XATTR_SHORT_BLOCK_LEN);

return total;
}
Expand All @@ -202,7 +207,6 @@ int chain_fgetxattr(int fd, const char *name, void *val, size_t size)
do {
chunk_size = (size < CHAIN_XATTR_MAX_BLOCK_LEN ? size : CHAIN_XATTR_MAX_BLOCK_LEN);
get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
size -= chunk_size;

r = sys_fgetxattr(fd, raw_name, (char *)val + pos, chunk_size);
if (i && r == -ENODATA) {
Expand All @@ -214,17 +218,21 @@ int chain_fgetxattr(int fd, const char *name, void *val, size_t size)
break;
}

if (r > 0)
if (r > 0) {
pos += r;
size -= r;
}

i++;
} while (size && r == CHAIN_XATTR_MAX_BLOCK_LEN);
} while (size && (r == CHAIN_XATTR_MAX_BLOCK_LEN ||
r == CHAIN_XATTR_SHORT_BLOCK_LEN));

if (r >= 0) {
ret = pos;
/* is there another chunk? that can happen if the last read size span over
exactly one block */
if (chunk_size == CHAIN_XATTR_MAX_BLOCK_LEN) {
if (chunk_size == CHAIN_XATTR_MAX_BLOCK_LEN ||
chunk_size == CHAIN_XATTR_SHORT_BLOCK_LEN) {
get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
r = sys_fgetxattr(fd, raw_name, 0, 0);
if (r > 0) { // there's another chunk.. the original buffer was too small
Expand All @@ -238,14 +246,24 @@ int chain_fgetxattr(int fd, const char *name, void *val, size_t size)

// setxattr

static int get_xattr_block_size(size_t size)
{
if (size < CHAIN_XATTR_SHORT_LEN_THRESHOLD)
// this may fit in the inode; stripe over short attrs so that XFS
// won't kick it out.
return CHAIN_XATTR_SHORT_BLOCK_LEN;
return CHAIN_XATTR_MAX_BLOCK_LEN;
}

int chain_setxattr(const char *fn, const char *name, const void *val, size_t size)
{
int i = 0, pos = 0;
char raw_name[CHAIN_XATTR_MAX_NAME_LEN * 2 + 16];
int ret = 0;
size_t max_chunk_size = get_xattr_block_size(size);

do {
size_t chunk_size = (size < CHAIN_XATTR_MAX_BLOCK_LEN ? size : CHAIN_XATTR_MAX_BLOCK_LEN);
size_t chunk_size = (size < max_chunk_size ? size : max_chunk_size);
get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
size -= chunk_size;

Expand Down Expand Up @@ -278,9 +296,10 @@ int chain_fsetxattr(int fd, const char *name, const void *val, size_t size)
int i = 0, pos = 0;
char raw_name[CHAIN_XATTR_MAX_NAME_LEN * 2 + 16];
int ret = 0;
size_t max_chunk_size = get_xattr_block_size(size);

do {
size_t chunk_size = (size < CHAIN_XATTR_MAX_BLOCK_LEN ? size : CHAIN_XATTR_MAX_BLOCK_LEN);
size_t chunk_size = (size < max_chunk_size ? size : max_chunk_size);
get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
size -= chunk_size;

Expand Down
6 changes: 6 additions & 0 deletions src/os/chain_xattr.h
Expand Up @@ -11,6 +11,12 @@
#define CHAIN_XATTR_MAX_NAME_LEN 128
#define CHAIN_XATTR_MAX_BLOCK_LEN 2048

/*
* XFS will only inline xattrs < 255 bytes, so for xattrs that are
* likely to fit in the inode, stripe over short xattrs.
*/
#define CHAIN_XATTR_SHORT_BLOCK_LEN 250
#define CHAIN_XATTR_SHORT_LEN_THRESHOLD 1800

// wrappers to hide annoying errno handling.

Expand Down
34 changes: 34 additions & 0 deletions src/test/objectstore/chain_xattr.cc
Expand Up @@ -182,6 +182,40 @@ TEST(chain_xattr, chunk_aligned) {
ASSERT_EQ(0, chain_fremovexattr(fd, name2.c_str()));
}

for (int len = CHAIN_XATTR_SHORT_BLOCK_LEN - 10;
len < CHAIN_XATTR_SHORT_BLOCK_LEN + 10;
++len) {
cout << len << std::endl;
const string x(len, 'x');
char buf[len*2];
ASSERT_EQ(len, chain_setxattr(file, name.c_str(), x.c_str(), len));
char attrbuf[4096];
int l = ceph_os_listxattr(file, attrbuf, sizeof(attrbuf));
for (char *p = attrbuf; p - attrbuf < l; p += strlen(p) + 1) {
cout << " attr " << p << std::endl;
}
ASSERT_EQ(len, chain_getxattr(file, name.c_str(), buf, len*2));
}

{
// test tail path in chain_getxattr
const char *aname = "user.baz";
char buf[CHAIN_XATTR_SHORT_BLOCK_LEN*3];
memset(buf, 'x', sizeof(buf));
ASSERT_EQ(sizeof(buf), chain_setxattr(file, aname, buf, sizeof(buf)));
ASSERT_EQ(-ERANGE, chain_getxattr(file, aname, buf,
CHAIN_XATTR_SHORT_BLOCK_LEN*2));
}
{
// test tail path in chain_fgetxattr
const char *aname = "user.biz";
char buf[CHAIN_XATTR_SHORT_BLOCK_LEN*3];
memset(buf, 'x', sizeof(buf));
ASSERT_EQ(sizeof(buf), chain_fsetxattr(fd, aname, buf, sizeof(buf)));
ASSERT_EQ(-ERANGE, chain_fgetxattr(fd, aname, buf,
CHAIN_XATTR_SHORT_BLOCK_LEN*2));
}

::close(fd);
::unlink(file);
}
Expand Down

0 comments on commit 7dc5cf4

Please sign in to comment.