Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
ls: use statx instead of stat when available
statx allows ls to indicate interest in only certain inode metadata.
This is potentially a win on networked/clustered/distributed
file systems. In cases where we'd have to do a full, heavyweight stat()
call we can now do a much lighter statx() call.

As a real-world example, consider a file system like CephFS where one
client is actively writing to a file and another client does an
ls --color in the same directory. --color means that we need to fetch
the mode of the file.

Doing that with a stat() call means that we have to fetch the size and
mtime in addition to the mode. The MDS in that situation will have to
revoke caps in order to ensure that it has up-to-date values to report,
which disrupts the writer.

This has a measurable affect on performance. I ran a fio sequential
write test on one cephfs client and had a second client do "ls --color"
in a tight loop on the directory that held the file:

Baseline -- no activity on the second client:

WRITE: bw=76.7MiB/s (80.4MB/s), 76.7MiB/s-76.7MiB/s (80.4MB/s-80.4MB/s),
       io=4600MiB (4824MB), run=60016-60016msec

Without this patch series, we see a noticable performance hit:

WRITE: bw=70.4MiB/s (73.9MB/s), 70.4MiB/s-70.4MiB/s (73.9MB/s-73.9MB/s),
       io=4228MiB (4433MB), run=60012-60012msec

With this patch series, we gain most of that ground back:

WRITE: bw=75.9MiB/s (79.6MB/s), 75.9MiB/s-75.9MiB/s (79.6MB/s-79.6MB/s),
       io=4555MiB (4776MB), run=60019-60019msec

* src/stat.c: move statx to stat struct conversion to new header...
* src/statx.h: ...here.
* src/ls.c: Add wrapper functions for stat/lstat/fstat calls,
and add variants for when we are only interested in specific info.
Add statx-enabled functions and set the request mask based on the
output format and what values are needed.
* NEWS: Mention the Improvement.
  • Loading branch information
jtlayton authored and pixelb committed Oct 9, 2019
1 parent 86ee554 commit a99ab26
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 41 deletions.
10 changes: 7 additions & 3 deletions NEWS
Expand Up @@ -55,10 +55,14 @@ GNU coreutils NEWS -*- outline -*-
od --skip-bytes now can use lseek even if the input is not a regular
file, greatly improving performance in some cases.

stat(1) now uses the statx() system call where available, which can
stat(1) supports a new --cached= option, used on systems with statx(2)
to control cache coherency of file system attributes,
useful on network file systems.

** Improvements

stat and ls now use the statx() system call where available, which can
operate more efficiently by only retrieving requested attributes.
stat(1) also supports a new --cached= option to control cache
coherency of file system attributes, useful on network file systems.


* Noteworthy changes in release 8.31 (2019-03-10) [stable]
Expand Down
1 change: 1 addition & 0 deletions src/local.mk
Expand Up @@ -58,6 +58,7 @@ noinst_HEADERS = \
src/prog-fprintf.h \
src/remove.h \
src/set-fields.h \
src/statx.h \
src/system.h \
src/uname.h

Expand Down
145 changes: 138 additions & 7 deletions src/ls.c
Expand Up @@ -114,6 +114,7 @@
#include "xgethostname.h"
#include "c-ctype.h"
#include "canonicalize.h"
#include "statx.h"

/* Include <sys/capability.h> last to avoid a clash of <sys/types.h>
include guards with some premature versions of libcap.
Expand Down Expand Up @@ -1063,6 +1064,136 @@ dired_dump_obstack (const char *prefix, struct obstack *os)
}
}

#if HAVE_STATX && defined STATX_INO
static unsigned int _GL_ATTRIBUTE_PURE
time_type_to_statx (void)
{
switch (time_type)
{
case time_ctime:
return STATX_CTIME;
case time_mtime:
return STATX_MTIME;
case time_atime:
return STATX_ATIME;
default:
abort ();
}
return 0;
}

static unsigned int _GL_ATTRIBUTE_PURE
calc_req_mask (void)
{
unsigned int mask = STATX_MODE;

if (print_inode)
mask |= STATX_INO;

if (print_block_size)
mask |= STATX_BLOCKS;

if (format == long_format) {
mask |= STATX_NLINK | STATX_SIZE | time_type_to_statx ();
if (print_owner || print_author)
mask |= STATX_UID;
if (print_group)
mask |= STATX_GID;
}

switch (sort_type)
{
case sort_none:
case sort_name:
case sort_version:
case sort_extension:
break;
case sort_time:
mask |= time_type_to_statx ();
break;
case sort_size:
mask |= STATX_SIZE;
break;
default:
abort ();
}

return mask;
}

static int
do_statx (int fd, const char *name, struct stat *st, int flags,
unsigned int mask)
{
struct statx stx;
int ret = statx (fd, name, flags, mask, &stx);
if (ret >= 0)
statx_to_stat (&stx, st);
return ret;
}

static inline int
do_stat (const char *name, struct stat *st)
{
return do_statx (AT_FDCWD, name, st, 0, calc_req_mask ());
}

static inline int
do_lstat (const char *name, struct stat *st)
{
return do_statx (AT_FDCWD, name, st, AT_SYMLINK_NOFOLLOW, calc_req_mask ());
}

static inline int
stat_for_mode (const char *name, struct stat *st)
{
return do_statx (AT_FDCWD, name, st, 0, STATX_MODE);
}

/* dev+ino should be static, so no need to sync with backing store */
static inline int
stat_for_ino (const char *name, struct stat *st)
{
return do_statx (AT_FDCWD, name, st, 0, STATX_INO);
}

static inline int
fstat_for_ino (int fd, struct stat *st)
{
return do_statx (fd, "", st, AT_EMPTY_PATH, STATX_INO);
}
#else
static inline int
do_stat (const char *name, struct stat *st)
{
return stat (name, st);
}

static inline int
do_lstat (const char *name, struct stat *st)
{
return lstat (name, st);
}

static inline int
stat_for_mode (const char *name, struct stat *st)
{
return stat (name, st);
}

static inline int
stat_for_ino (const char *name, struct stat *st)
{
return stat (name, st);
}

static inline int
fstat_for_ino (int fd, struct stat *st)
{
return fstat (fd, st);
}
#endif

/* Return the address of the first plain %b spec in FMT, or NULL if
there is no such spec. %5b etc. do not match, so that user
widths/flags are honored. */
Expand Down Expand Up @@ -2737,10 +2868,10 @@ print_dir (char const *name, char const *realname, bool command_line_arg)
struct stat dir_stat;
int fd = dirfd (dirp);

/* If dirfd failed, endure the overhead of using stat. */
/* If dirfd failed, endure the overhead of stat'ing by path */
if ((0 <= fd
? fstat (fd, &dir_stat)
: stat (name, &dir_stat)) < 0)
? fstat_for_ino (fd, &dir_stat)
: stat_for_ino (name, &dir_stat)) < 0)
{
file_failure (command_line_arg,
_("cannot determine device and inode of %s"), name);
Expand Down Expand Up @@ -3202,7 +3333,7 @@ gobble_file (char const *name, enum filetype type, ino_t inode,
switch (dereference)
{
case DEREF_ALWAYS:
err = stat (full_name, &f->stat);
err = do_stat (full_name, &f->stat);
do_deref = true;
break;

Expand All @@ -3211,7 +3342,7 @@ gobble_file (char const *name, enum filetype type, ino_t inode,
if (command_line_arg)
{
bool need_lstat;
err = stat (full_name, &f->stat);
err = do_stat (full_name, &f->stat);
do_deref = true;

if (dereference == DEREF_COMMAND_LINE_ARGUMENTS)
Expand All @@ -3231,7 +3362,7 @@ gobble_file (char const *name, enum filetype type, ino_t inode,
FALLTHROUGH;

default: /* DEREF_NEVER */
err = lstat (full_name, &f->stat);
err = do_lstat (full_name, &f->stat);
do_deref = false;
break;
}
Expand Down Expand Up @@ -3320,7 +3451,7 @@ gobble_file (char const *name, enum filetype type, ino_t inode,
they won't be traced and when no indicator is needed. */
if (linkname
&& (file_type <= indicator_style || check_symlink_mode)
&& stat (linkname, &linkstats) == 0)
&& stat_for_mode (linkname, &linkstats) == 0)
{
f->linkok = true;
f->linkmode = linkstats.st_mode;
Expand Down
32 changes: 1 addition & 31 deletions src/stat.c
Expand Up @@ -73,6 +73,7 @@
#include "strftime.h"
#include "find-mount-point.h"
#include "xvasprintf.h"
#include "statx.h"

#if HAVE_STATX && defined STATX_INO
# define USE_STATX 1
Expand Down Expand Up @@ -1245,37 +1246,6 @@ static bool dont_sync;
static bool force_sync;

#if USE_STATX
/* Much of the format printing requires a struct stat or timespec */
static struct timespec
statx_timestamp_to_timespec (struct statx_timestamp tsx)
{
struct timespec ts;

ts.tv_sec = tsx.tv_sec;
ts.tv_nsec = tsx.tv_nsec;
return ts;
}

static void
statx_to_stat (struct statx *stx, struct stat *stat)
{
stat->st_dev = makedev (stx->stx_dev_major, stx->stx_dev_minor);
stat->st_ino = stx->stx_ino;
stat->st_mode = stx->stx_mode;
stat->st_nlink = stx->stx_nlink;
stat->st_uid = stx->stx_uid;
stat->st_gid = stx->stx_gid;
stat->st_rdev = makedev (stx->stx_rdev_major, stx->stx_rdev_minor);
stat->st_size = stx->stx_size;
stat->st_blksize = stx->stx_blksize;
/* define to avoid sc_prohibit_stat_st_blocks. */
# define SC_ST_BLOCKS st_blocks
stat->SC_ST_BLOCKS = stx->stx_blocks;
stat->st_atim = statx_timestamp_to_timespec (stx->stx_atime);
stat->st_mtim = statx_timestamp_to_timespec (stx->stx_mtime);
stat->st_ctim = statx_timestamp_to_timespec (stx->stx_ctime);
}

static unsigned int
fmt_to_mask (char fmt)
{
Expand Down
52 changes: 52 additions & 0 deletions src/statx.h
@@ -0,0 +1,52 @@
/* statx -> stat conversion functions for coreutils
Copyright (C) 2019 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */

#ifndef COREUTILS_STATX_H
# define COREUTILS_STATX_H

# if HAVE_STATX && defined STATX_INO
/* Much of the format printing requires a struct stat or timespec */
static inline struct timespec
statx_timestamp_to_timespec (struct statx_timestamp tsx)
{
struct timespec ts;

ts.tv_sec = tsx.tv_sec;
ts.tv_nsec = tsx.tv_nsec;
return ts;
}

static inline void
statx_to_stat (struct statx *stx, struct stat *stat)
{
stat->st_dev = makedev (stx->stx_dev_major, stx->stx_dev_minor);
stat->st_ino = stx->stx_ino;
stat->st_mode = stx->stx_mode;
stat->st_nlink = stx->stx_nlink;
stat->st_uid = stx->stx_uid;
stat->st_gid = stx->stx_gid;
stat->st_rdev = makedev (stx->stx_rdev_major, stx->stx_rdev_minor);
stat->st_size = stx->stx_size;
stat->st_blksize = stx->stx_blksize;
/* define to avoid sc_prohibit_stat_st_blocks. */
# define SC_ST_BLOCKS st_blocks
stat->SC_ST_BLOCKS = stx->stx_blocks;
stat->st_atim = statx_timestamp_to_timespec (stx->stx_atime);
stat->st_mtim = statx_timestamp_to_timespec (stx->stx_mtime);
stat->st_ctim = statx_timestamp_to_timespec (stx->stx_ctime);
}
# endif /* HAVE_STATX && defined STATX_INO */
#endif /* COREUTILS_STATX_H */

0 comments on commit a99ab26

Please sign in to comment.