Skip to content

Commit

Permalink
Fix mmap / libaio deadlock
Browse files Browse the repository at this point in the history
Calling uiomove() in mappedread() can result in deadlock if the
user space page needs to be faulted in.

This issue is that uiomove() must be called with the page lock held
in order to safely populate the page date.  If the page needs to be
faulted in by filemap_page_mkwrite() then it will also take the page
lock resulting in a double-lock.

Normally this isn't an issue since the pages are very likely to be
already faulted in.  This patch makes sure that is always the case
by prefaulting in the user space pages.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue openzfs#7335
  • Loading branch information
behlendorf committed Mar 26, 2018
1 parent 9ea6c3d commit 4b0dafa
Show file tree
Hide file tree
Showing 16 changed files with 184 additions and 75 deletions.
14 changes: 14 additions & 0 deletions config/user-libaio.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
dnl #
dnl # Check for libaio - only used for libaiot test cases.
dnl #
AC_DEFUN([ZFS_AC_CONFIG_USER_LIBAIO], [
LIBAIO=
AC_CHECK_HEADER([libaio.h], [
user_libaio=yes
AC_SUBST([LIBAIO], ["-laio"])
AC_DEFINE([HAVE_LIBAIO], 1, [Define if you have libaio])
], [
user_libaio=no
])
])
1 change: 1 addition & 0 deletions config/user.m4
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER], [
ZFS_AC_CONFIG_USER_LIBATTR
ZFS_AC_CONFIG_USER_LIBUDEV
ZFS_AC_CONFIG_USER_LIBSSL
ZFS_AC_CONFIG_USER_LIBAIO
ZFS_AC_CONFIG_USER_RUNSTATEDIR
ZFS_AC_CONFIG_USER_MAKEDEV_IN_SYSMACROS
ZFS_AC_CONFIG_USER_MAKEDEV_IN_MKDEV
Expand Down
4 changes: 2 additions & 2 deletions config/zfs-build.m4
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,11 @@ AC_DEFUN([ZFS_AC_CONFIG], [
AM_CONDITIONAL([CONFIG_KERNEL],
[test "$ZFS_CONFIG" = kernel -o "$ZFS_CONFIG" = all] &&
[test "x$enable_linux_builtin" != xyes ])
AM_CONDITIONAL([WANT_DEVNAME2DEVID],
[test "x$user_libudev" = xyes ])
AM_CONDITIONAL([CONFIG_QAT],
[test "$ZFS_CONFIG" = kernel -o "$ZFS_CONFIG" = all] &&
[test "x$qatsrc" != x ])
AM_CONDITIONAL([WANT_DEVNAME2DEVID], [test "x$user_libudev" = xyes ])
AM_CONDITIONAL([WANT_MMAP_LIBAIO], [test "x$user_libaio" = xyes ])
])

dnl #
Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/cmd/mkfiles/Makefile
tests/zfs-tests/cmd/mktree/Makefile
tests/zfs-tests/cmd/mmap_exec/Makefile
tests/zfs-tests/cmd/mmap_libaio/Makefile
tests/zfs-tests/cmd/mmapwrite/Makefile
tests/zfs-tests/cmd/nvlist_to_lua/Makefile
tests/zfs-tests/cmd/randfree_file/Makefile
Expand Down
1 change: 0 additions & 1 deletion include/sys/uio_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
#include <sys/uio.h>

extern int uiomove(void *, size_t, enum uio_rw, uio_t *);
extern void uio_prefaultpages(ssize_t, uio_t *);
extern int uiocopy(void *, size_t, enum uio_rw, uio_t *, size_t *);
extern void uioskip(uio_t *, size_t);

Expand Down
61 changes: 0 additions & 61 deletions module/zcommon/zfs_uio.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,67 +148,6 @@ uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
}
EXPORT_SYMBOL(uiomove);

#define fuword8(uptr, vptr) get_user((*vptr), (uptr))

/*
* Fault in the pages of the first n bytes specified by the uio structure.
* 1 byte in each page is touched and the uio struct is unmodified. Any
* error will terminate the process as this is only a best attempt to get
* the pages resident.
*/
void
uio_prefaultpages(ssize_t n, struct uio *uio)
{
const struct iovec *iov;
ulong_t cnt, incr;
caddr_t p;
uint8_t tmp;
int iovcnt;
size_t skip;

/* no need to fault in kernel pages */
switch (uio->uio_segflg) {
case UIO_SYSSPACE:
case UIO_BVEC:
return;
case UIO_USERSPACE:
case UIO_USERISPACE:
break;
default:
ASSERT(0);
}

iov = uio->uio_iov;
iovcnt = uio->uio_iovcnt;
skip = uio->uio_skip;

for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
cnt = MIN(iov->iov_len - skip, n);
/* empty iov */
if (cnt == 0)
continue;
n -= cnt;
/*
* touch each page in this segment.
*/
p = iov->iov_base + skip;
while (cnt) {
if (fuword8((uint8_t *)p, &tmp))
return;
incr = MIN(cnt, PAGESIZE);
p += incr;
cnt -= incr;
}
/*
* touch the last byte in case it straddles a page.
*/
p--;
if (fuword8((uint8_t *)p, &tmp))
return;
}
}
EXPORT_SYMBOL(uio_prefaultpages);

/*
* same as uiomove() but doesn't modify uio structure.
* return in cbytes how many bytes were copied.
Expand Down
10 changes: 1 addition & 9 deletions module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -397,10 +397,8 @@ mappedread(struct inode *ip, int nbytes, uio_t *uio)
for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
bytes = MIN(PAGE_SIZE - off, len);

pp = find_lock_page(mp, start >> PAGE_SHIFT);
pp = find_get_page(mp, start >> PAGE_SHIFT);
if (pp) {
ASSERT(PageUptodate(pp));

pb = kmap(pp);
error = uiomove(pb + off, bytes, UIO_READ, uio);
kunmap(pp);
Expand All @@ -409,7 +407,6 @@ mappedread(struct inode *ip, int nbytes, uio_t *uio)
flush_dcache_page(pp);

mark_page_accessed(pp);
unlock_page(pp);
put_page(pp);
} else {
error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
Expand Down Expand Up @@ -675,9 +672,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
if ((uio->uio_extflg == UIO_XUIO) &&
(((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
xuio = (xuio_t *)uio;
else
#endif
uio_prefaultpages(MIN(n, max_blksz), uio);

/*
* If in append mode, set the io offset pointer to eof.
Expand Down Expand Up @@ -927,9 +922,6 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
break;
ASSERT(tx_bytes == nbytes);
n -= nbytes;

if (!xuio && n > 0)
uio_prefaultpages(MIN(n, max_blksz), uio);
}

zfs_inode_update(zp);
Expand Down
1 change: 1 addition & 0 deletions rpm/generic/zfs.spec.in
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ Requires: fio
Requires: acl
Requires: sudo
Requires: sysstat
Requires: libaio
AutoReqProv: no

%description test
Expand Down
2 changes: 1 addition & 1 deletion tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos',
tags = ['functional', 'migration']

[tests/functional/mmap]
tests = ['mmap_write_001_pos', 'mmap_read_001_pos']
tests = ['mmap_write_001_pos', 'mmap_read_001_pos', 'mmap_libaio_001_pos']
tags = ['functional', 'mmap']

[tests/functional/mmp]
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/cmd/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ SUBDIRS = \
mkfiles \
mktree \
mmap_exec \
mmap_libaio \
mmapwrite \
nvlist_to_lua \
randfree_file \
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/cmd/mmap_libaio/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/mmap_libaio
9 changes: 9 additions & 0 deletions tests/zfs-tests/cmd/mmap_libaio/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
include $(top_srcdir)/config/Rules.am

pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin

if WANT_MMAP_LIBAIO
pkgexec_PROGRAMS = mmap_libaio
mmap_libaio_SOURCES = mmap_libaio.c
mmap_libaio_LDADD = $(LIBAIO)
endif
88 changes: 88 additions & 0 deletions tests/zfs-tests/cmd/mmap_libaio/mmap_libaio.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* Copyright 2018 Canonical. All rights reserved.
*/

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <libaio.h>
#include <err.h>

io_context_t io_ctx;

static void
do_sync_io(struct iocb *iocb)
{
struct io_event event;
struct iocb *iocbs[] = { iocb };
struct timespec ts = { 30, 0 };

if (io_submit(io_ctx, 1, iocbs) != 1)
err(1, "io_submit failed");

if (io_getevents(io_ctx, 0, 1, &event, &ts) != 1)
err(1, "io_getevents failed");
}

int
main(int argc, char **argv)
{
char *buf;
int page_size = getpagesize();
int buf_size = strtol(argv[2], NULL, 0);
int rwfd;
struct iocb iocb;

if (io_queue_init(1024, &io_ctx))
err(1, "io_queue_init failed");

rwfd = open(argv[1], O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
if (rwfd < 0)
err(1, "open failed");

if (ftruncate(rwfd, buf_size) < 0)
err(1, "ftruncate failed");

buf = mmap(0, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rwfd, 0);
if (buf == MAP_FAILED)
err(1, "mmap failed");

(void) io_prep_pwrite(&iocb, rwfd, buf, buf_size, 0);
do_sync_io(&iocb);

(void) io_prep_pread(&iocb, rwfd, buf, buf_size, 0);
do_sync_io(&iocb);

if (close(rwfd))
err(1, "close failed");

if (io_queue_release(io_ctx) != 0)
err(1, "io_queue_release failed");

return (0);
}
1 change: 1 addition & 0 deletions tests/zfs-tests/include/commands.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ export ZFSTEST_FILES='chg_usr_exec
mkfiles
mktree
mmap_exec
mmap_libaio
mmapwrite
nvlist_to_lua
randfree_file
Expand Down
3 changes: 2 additions & 1 deletion tests/zfs-tests/tests/functional/mmap/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ dist_pkgdata_SCRIPTS = \
cleanup.ksh \
mmap.cfg \
mmap_read_001_pos.ksh \
mmap_write_001_pos.ksh
mmap_write_001_pos.ksh \
mmap_libaio_001_pos.ksh
61 changes: 61 additions & 0 deletions tests/zfs-tests/tests/functional/mmap/mmap_libaio_001_pos.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright 2018 Canonical. All rights reserved.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/mmap/mmap.cfg

#
# DESCRIPTION:
# Verify libaio functions correctly with mmap()'d files.
#
# STRATEGY:
# 1. Call mmap_libaio binary
# 2. Verify the file exists and is the expected size
# 3. Verify the filesystem is intact and not hung in any way
#

verify_runnable "global"

log_assert "verify mmap'd pages work with libaio"

# mmap_libaio is built when the libaio-devel package is installed.
if ! which mmap_libaio; then
log_unsupported "This test requires mmap_libaio."
fi

log_must chmod 777 $TESTDIR

for size in 512 4096 8192; do
log_mustnot stat $TESTDIR/test-libaio-file
log_must mmap_libaio $TESTDIR/test-libaio-file $size
log_must verify_eq $(stat --format=%s $TESTDIR/test-libaio-file) $size
log_must rm $TESTDIR/test-libaio-file
done

typeset dir=$(get_device_dir $DISKS)
verify_filesys "$TESTPOOL" "$TESTPOOL/$TESTFS" "$dir"

log_pass "mmap'd pages work with libaio"

0 comments on commit 4b0dafa

Please sign in to comment.