From 36e55d97f5cd2eab3e0251bf31d166387752e0d9 Mon Sep 17 00:00:00 2001 From: Will Andrews Date: Sun, 21 Feb 2021 10:19:43 -0600 Subject: [PATCH] Add Linux namespace delegation support This allows ZFS datasets to be delegated to a user/mount namespace Within that namespace, only the delegated datasets are visible Works very similarly to Zones/Jailes on other ZFS OSes As a user: ``` $ unshare -Um $ zfs list no datasets available $ echo $$ 1234 ``` As root: ``` # zfs list NAME ZONED MOUNTPOINT containers off /containers containers/host off /containers/host containers/host/child off /containers/host/child containers/host/child/gchild off /containers/host/child/gchild containers/unpriv on /unpriv containers/unpriv/child on /unpriv/child containers/unpriv/child/gchild on /unpriv/child/gchild # zfs zone /proc/1234/ns/user containers/unpriv ``` Back to the user namespace: ``` $ zfs list NAME USED AVAIL REFER MOUNTPOINT containers 129M 47.8G 24K /containers containers/unpriv 128M 47.8G 24K /unpriv containers/unpriv/child 128M 47.8G 128M /unpriv/child ``` Reviewed-by: Brian Behlendorf Signed-off-by: Will Andrews Signed-off-by: Allan Jude Signed-off-by: Mateusz Piotrowski Co-authored-by: Allan Jude Co-authored-by: Mateusz Piotrowski Sponsored-by: Buddy Closes #12263 --- cmd/zfs/zfs_main.c | 60 +++ config/kernel-user-ns-inum.m4 | 23 + config/kernel.m4 | 2 + contrib/pyzfs/libzfs_core/_constants.py | 1 + include/libzfs.h | 10 + include/os/linux/spl/sys/zone.h | 31 +- include/sys/fs/zfs.h | 3 + lib/libspl/include/sys/types.h | 2 +- lib/libspl/include/zone.h | 12 +- lib/libspl/os/linux/zone.c | 32 +- lib/libuutil/libuutil.abi | 2 +- lib/libzfs/libzfs.abi | 9 +- lib/libzfs/libzfs_util.c | 6 + lib/libzfs/os/linux/libzfs_util_os.c | 69 +++ lib/libzfs_core/libzfs_core.abi | 2 +- man/Makefile.am | 2 + man/man7/zfsprops.7 | 3 +- man/man8/zfs-unzone.8 | 1 + man/man8/zfs-zone.8 | 116 +++++ module/Kbuild.in | 3 +- module/os/linux/spl/spl-generic.c | 6 + module/os/linux/spl/spl-zone.c | 424 ++++++++++++++++++ module/os/linux/zfs/policy.c | 2 +- module/os/linux/zfs/zfs_ioctl_os.c | 47 ++ module/os/linux/zfs/zfs_vfsops.c | 20 + module/os/linux/zfs/zpl_super.c | 1 + tests/runfiles/linux.run | 3 +- tests/zfs-tests/include/commands.cfg | 2 + tests/zfs-tests/tests/Makefile.am | 3 + .../user_namespace/user_namespace_001.ksh | 5 + .../user_namespace/user_namespace_002.ksh | 115 +++++ .../user_namespace/user_namespace_003.ksh | 97 ++++ .../user_namespace/user_namespace_004.ksh | 67 +++ 33 files changed, 1166 insertions(+), 15 deletions(-) create mode 100644 config/kernel-user-ns-inum.m4 create mode 120000 man/man8/zfs-unzone.8 create mode 100644 man/man8/zfs-zone.8 create mode 100644 module/os/linux/spl/spl-zone.c create mode 100755 tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh create mode 100755 tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh create mode 100755 tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 6282d894638e..30b2ae0c4b4f 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -127,6 +127,11 @@ static int zfs_do_jail(int argc, char **argv); static int zfs_do_unjail(int argc, char **argv); #endif +#ifdef __linux__ +static int zfs_do_zone(int argc, char **argv); +static int zfs_do_unzone(int argc, char **argv); +#endif + /* * Enable a reasonable set of defaults for libumem debugging on DEBUG builds. */ @@ -184,6 +189,8 @@ typedef enum { HELP_JAIL, HELP_UNJAIL, HELP_WAIT, + HELP_ZONE, + HELP_UNZONE, } zfs_help_t; typedef struct zfs_command { @@ -254,6 +261,11 @@ static zfs_command_t command_table[] = { { "jail", zfs_do_jail, HELP_JAIL }, { "unjail", zfs_do_unjail, HELP_UNJAIL }, #endif + +#ifdef __linux__ + { "zone", zfs_do_zone, HELP_ZONE }, + { "unzone", zfs_do_unzone, HELP_UNZONE }, +#endif }; #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) @@ -415,6 +427,10 @@ get_usage(zfs_help_t idx) return (gettext("\tunjail \n")); case HELP_WAIT: return (gettext("\twait [-t ] \n")); + case HELP_ZONE: + return (gettext("\tzone \n")); + case HELP_UNZONE: + return (gettext("\tunzone \n")); default: __builtin_unreachable(); } @@ -8692,6 +8708,50 @@ main(int argc, char **argv) return (ret); } +/* + * zfs zone nsfile filesystem + * + * Add or delete the given dataset to/from the namespace. + */ +#ifdef __linux__ +static int +zfs_do_zone_impl(int argc, char **argv, boolean_t attach) +{ + zfs_handle_t *zhp; + int ret; + + if (argc < 3) { + (void) fprintf(stderr, gettext("missing argument(s)\n")); + usage(B_FALSE); + } + if (argc > 3) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM); + if (zhp == NULL) + return (1); + + ret = (zfs_userns(zhp, argv[1], attach) != 0); + + zfs_close(zhp); + return (ret); +} + +static int +zfs_do_zone(int argc, char **argv) +{ + return (zfs_do_zone_impl(argc, argv, B_TRUE)); +} + +static int +zfs_do_unzone(int argc, char **argv) +{ + return (zfs_do_zone_impl(argc, argv, B_FALSE)); +} +#endif + #ifdef __FreeBSD__ #include #include diff --git a/config/kernel-user-ns-inum.m4 b/config/kernel-user-ns-inum.m4 new file mode 100644 index 000000000000..2207a4aa6921 --- /dev/null +++ b/config/kernel-user-ns-inum.m4 @@ -0,0 +1,23 @@ +dnl # +dnl # 3.18 API change +dnl # struct user_namespace inum moved from .proc_inum to .ns.inum. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM], [ + ZFS_LINUX_TEST_SRC([user_ns_common_inum], [ + #include + ], [ + struct user_namespace uns; + uns.ns.inum = 0; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_USER_NS_COMMON_INUM], [ + AC_MSG_CHECKING([whether user_namespace->ns.inum exists]) + ZFS_LINUX_TEST_RESULT([user_ns_common_inum], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_USER_NS_COMMON_INUM, 1, + [user_namespace->ns.inum exists]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 9530367507d6..1f274cbe4f30 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -145,6 +145,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_KTHREAD ZFS_AC_KERNEL_SRC_ZERO_PAGE ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC + ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM AC_MSG_CHECKING([for available kernel interfaces]) ZFS_LINUX_TEST_COMPILE_ALL([kabi]) @@ -263,6 +264,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_KTHREAD ZFS_AC_KERNEL_ZERO_PAGE ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC + ZFS_AC_KERNEL_USER_NS_COMMON_INUM ]) dnl # diff --git a/contrib/pyzfs/libzfs_core/_constants.py b/contrib/pyzfs/libzfs_core/_constants.py index 98a501ef40e2..2db2bba8a824 100644 --- a/contrib/pyzfs/libzfs_core/_constants.py +++ b/contrib/pyzfs/libzfs_core/_constants.py @@ -100,6 +100,7 @@ def enum(*sequential, **named): 'ZFS_ERR_REBUILD_IN_PROGRESS', 'ZFS_ERR_BADPROP', 'ZFS_ERR_VDEV_NOTSUP', + 'ZFS_ERR_NOT_USER_NAMESPACE', ], {} ) diff --git a/include/libzfs.h b/include/libzfs.h index 2c2aa3faf14b..fe420de4d4de 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -150,6 +150,7 @@ typedef enum zfs_error { EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */ EZFS_REBUILDING, /* resilvering (sequential reconstrution) */ EZFS_VDEV_NOTSUP, /* ops not supported for this type of vdev */ + EZFS_NOT_USER_NAMESPACE, /* a file is not a user namespace */ EZFS_UNKNOWN } zfs_error_t; @@ -979,6 +980,15 @@ _LIBZFS_H int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t, #endif /* __FreeBSD__ */ +#ifdef __linux__ + +/* + * Add or delete the given filesystem to/from the given user namespace. + */ +_LIBZFS_H int zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach); + +#endif + #ifdef __cplusplus } #endif diff --git a/include/os/linux/spl/sys/zone.h b/include/os/linux/spl/sys/zone.h index 00e30f690c38..5978a6285fa1 100644 --- a/include/os/linux/spl/sys/zone.h +++ b/include/os/linux/spl/sys/zone.h @@ -25,11 +25,34 @@ #define _SPL_ZONE_H #include +#include -#define GLOBAL_ZONEID 0 +#include +#include -#define zone_dataset_visible(x, y) (1) -#define crgetzoneid(x) (GLOBAL_ZONEID) -#define INGLOBALZONE(z) (1) +/* + * Attach the given dataset to the given user namespace. + */ +extern int zone_dataset_attach(cred_t *, const char *, int); + +/* + * Detach the given dataset from the given user namespace. + */ +extern int zone_dataset_detach(cred_t *, const char *, int); + +/* + * Returns true if the named pool/dataset is visible in the current zone. + */ +extern int zone_dataset_visible(const char *dataset, int *write); + +int spl_zone_init(void); +void spl_zone_fini(void); + +extern unsigned int crgetzoneid(const cred_t *); +extern unsigned int global_zoneid(void); +extern boolean_t inglobalzone(proc_t *); + +#define INGLOBALZONE(x) inglobalzone(x) +#define GLOBAL_ZONEID global_zoneid() #endif /* SPL_ZONE_H */ diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 2f90af9999ad..73e4443988f7 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1471,7 +1471,9 @@ typedef enum zfs_ioc { ZFS_IOC_EVENTS_SEEK, /* 0x83 (Linux) */ ZFS_IOC_NEXTBOOT, /* 0x84 (FreeBSD) */ ZFS_IOC_JAIL, /* 0x85 (FreeBSD) */ + ZFS_IOC_USERNS_ATTACH = ZFS_IOC_JAIL, /* 0x85 (Linux) */ ZFS_IOC_UNJAIL, /* 0x86 (FreeBSD) */ + ZFS_IOC_USERNS_DETACH = ZFS_IOC_UNJAIL, /* 0x86 (Linux) */ ZFS_IOC_SET_BOOTENV, /* 0x87 */ ZFS_IOC_GET_BOOTENV, /* 0x88 */ ZFS_IOC_UNREGISTER_FS, /* 0x89 (Windows) */ @@ -1555,6 +1557,7 @@ typedef enum { ZFS_ERR_REBUILD_IN_PROGRESS, ZFS_ERR_BADPROP, ZFS_ERR_VDEV_NOTSUP, + ZFS_ERR_NOT_USER_NAMESPACE, } zfs_errno_t; /* diff --git a/lib/libspl/include/sys/types.h b/lib/libspl/include/sys/types.h index e3fb92884eab..a8631c1287ea 100644 --- a/lib/libspl/include/sys/types.h +++ b/lib/libspl/include/sys/types.h @@ -44,7 +44,7 @@ #include #endif /* HAVE_INTTYPES */ -typedef int zoneid_t; +typedef uint_t zoneid_t; typedef int projid_t; /* diff --git a/lib/libspl/include/zone.h b/lib/libspl/include/zone.h index b0ac2d9bc610..0af4e7a2fa49 100644 --- a/lib/libspl/include/zone.h +++ b/lib/libspl/include/zone.h @@ -33,7 +33,17 @@ extern "C" { #endif -#define GLOBAL_ZONEID 0 +#ifdef __FreeBSD__ +#define GLOBAL_ZONEID 0 +#else +/* + * Hardcoded in the kernel's root user namespace. A "better" way to get + * this would be by using ioctl_ns(2), but this would need to be performed + * recursively on NS_GET_PARENT and then NS_GET_USERNS. Also, that's only + * supported since Linux 4.9. + */ +#define GLOBAL_ZONEID 4026531837U +#endif extern zoneid_t getzoneid(void); diff --git a/lib/libspl/os/linux/zone.c b/lib/libspl/os/linux/zone.c index 393a16ad5cdd..65c02dfe7aab 100644 --- a/lib/libspl/os/linux/zone.c +++ b/lib/libspl/os/linux/zone.c @@ -23,10 +23,40 @@ * Use is subject to license terms. */ +#include +#include +#include +#include +#include +#include + #include zoneid_t getzoneid(void) { - return (GLOBAL_ZONEID); + char path[PATH_MAX]; + char buf[128] = { '\0' }; + char *cp; + + int c = snprintf(path, sizeof (path), "/proc/self/ns/user"); + /* This API doesn't have any error checking... */ + if (c < 0) + return (0); + + ssize_t r = readlink(path, buf, sizeof (buf) - 1); + if (r < 0) + return (0); + + cp = strchr(buf, '['); + if (cp == NULL) + return (0); + cp++; + + unsigned long n = strtoul(cp, NULL, 10); + if (n == ULONG_MAX && errno == ERANGE) + return (0); + zoneid_t z = (zoneid_t)n; + + return (z); } diff --git a/lib/libuutil/libuutil.abi b/lib/libuutil/libuutil.abi index 86220b44b229..766d8843000d 100644 --- a/lib/libuutil/libuutil.abi +++ b/lib/libuutil/libuutil.abi @@ -1081,7 +1081,7 @@ - + diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 9f9a2f9071d9..fb5e01b82c40 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -433,6 +433,7 @@ + @@ -1537,7 +1538,7 @@ - + @@ -4414,6 +4415,12 @@ + + + + + + diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 3f923031d4b0..284145024503 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -298,6 +298,9 @@ libzfs_error_description(libzfs_handle_t *hdl) case EZFS_VDEV_NOTSUP: return (dgettext(TEXT_DOMAIN, "operation not supported " "on this type of vdev")); + case EZFS_NOT_USER_NAMESPACE: + return (dgettext(TEXT_DOMAIN, "the provided file " + "was not a user namespace file")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: @@ -484,6 +487,9 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) case ZFS_ERR_BADPROP: zfs_verror(hdl, EZFS_BADPROP, fmt, ap); break; + case ZFS_ERR_NOT_USER_NAMESPACE: + zfs_verror(hdl, EZFS_NOT_USER_NAMESPACE, fmt, ap); + break; default: zfs_error_aux(hdl, "%s", strerror(error)); zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); diff --git a/lib/libzfs/os/linux/libzfs_util_os.c b/lib/libzfs/os/linux/libzfs_util_os.c index 9d6f574a5546..7bd26ea98131 100644 --- a/lib/libzfs/os/linux/libzfs_util_os.c +++ b/lib/libzfs/os/linux/libzfs_util_os.c @@ -19,6 +19,9 @@ * CDDL HEADER END */ +/* + * Copyright (c) 2021 Klara, Inc. + */ #include #include @@ -207,3 +210,69 @@ zfs_version_kernel(void) ret[read - 1] = '\0'; return (ret); } + +/* + * Add or delete the given filesystem to/from the given user namespace. + */ +int +zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach) +{ + libzfs_handle_t *hdl = zhp->zfs_hdl; + zfs_cmd_t zc = {"\0"}; + char errbuf[1024]; + unsigned long cmd; + int ret; + + if (attach) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot add '%s' to namespace"), + zhp->zfs_name); + } else { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot remove '%s' from namespace"), + zhp->zfs_name); + } + + switch (zhp->zfs_type) { + case ZFS_TYPE_VOLUME: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "volumes can not be namespaced")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + case ZFS_TYPE_SNAPSHOT: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "snapshots can not be namespaced")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + case ZFS_TYPE_BOOKMARK: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "bookmarks can not be namespaced")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + case ZFS_TYPE_VDEV: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "vdevs can not be namespaced")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + case ZFS_TYPE_INVALID: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid zfs_type_t: ZFS_TYPE_INVALID")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + case ZFS_TYPE_POOL: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pools can not be namespaced")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + case ZFS_TYPE_FILESYSTEM: + zfs_fallthrough; + } + assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM); + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + zc.zc_objset_type = DMU_OST_ZFS; + zc.zc_cleanup_fd = open(nspath, O_RDONLY); + if (zc.zc_cleanup_fd < 0) { + return (zfs_error(hdl, EZFS_NOT_USER_NAMESPACE, errbuf)); + } + + cmd = attach ? ZFS_IOC_USERNS_ATTACH : ZFS_IOC_USERNS_DETACH; + if ((ret = zfs_ioctl(hdl, cmd, &zc)) != 0) + zfs_standard_error(hdl, errno, errbuf); + + return (ret); +} diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi index 266007e4dcad..fae98469a04f 100644 --- a/lib/libzfs_core/libzfs_core.abi +++ b/lib/libzfs_core/libzfs_core.abi @@ -939,7 +939,7 @@ - + diff --git a/man/Makefile.am b/man/Makefile.am index 8fa21d2fd23e..12f818372f37 100644 --- a/man/Makefile.am +++ b/man/Makefile.am @@ -59,9 +59,11 @@ dist_man_MANS = \ %D%/man8/zfs-unjail.8 \ %D%/man8/zfs-unload-key.8 \ %D%/man8/zfs-unmount.8 \ + %D%/man8/zfs-unzone.8 \ %D%/man8/zfs-upgrade.8 \ %D%/man8/zfs-userspace.8 \ %D%/man8/zfs-wait.8 \ + %D%/man8/zfs-zone.8 \ %D%/man8/zfs_ids_to_path.8 \ %D%/man8/zgenhostid.8 \ %D%/man8/zinject.8 \ diff --git a/man/man7/zfsprops.7 b/man/man7/zfsprops.7 index b1e1ce377fe2..4d6fc613c851 100644 --- a/man/man7/zfsprops.7 +++ b/man/man7/zfsprops.7 @@ -1885,8 +1885,7 @@ feature and are not relevant on other platforms. The default value is .Sy off . .It Sy zoned Ns = Ns Sy on Ns | Ns Sy off -Controls whether the dataset is managed from a non-global zone. -Zones are a Solaris feature and are not relevant on other platforms. +Controls whether the dataset is managed from a non-global zone or namespace. The default value is .Sy off . .El diff --git a/man/man8/zfs-unzone.8 b/man/man8/zfs-unzone.8 new file mode 120000 index 000000000000..9052b28aa880 --- /dev/null +++ b/man/man8/zfs-unzone.8 @@ -0,0 +1 @@ +zfs-zone.8 \ No newline at end of file diff --git a/man/man8/zfs-zone.8 b/man/man8/zfs-zone.8 new file mode 100644 index 000000000000..2f975dde6799 --- /dev/null +++ b/man/man8/zfs-zone.8 @@ -0,0 +1,116 @@ +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright 2011 Joshua M. Clulow +.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved. +.\" Copyright (c) 2011, Pawel Jakub Dawidek +.\" Copyright (c) 2012, Glen Barber +.\" Copyright (c) 2012, Bryan Drewery +.\" Copyright (c) 2013, Steven Hartland +.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. +.\" Copyright (c) 2014, Joyent, Inc. All rights reserved. +.\" Copyright (c) 2014 by Adam Stevko. All rights reserved. +.\" Copyright (c) 2014 Integros [integros.com] +.\" Copyright (c) 2014, Xin LI +.\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved. +.\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved. +.\" Copyright 2019 Richard Laager. All rights reserved. +.\" Copyright 2018 Nexenta Systems, Inc. +.\" Copyright 2019 Joyent, Inc. +.\" Copyright 2021 Klara, Inc. +.\" +.Dd June 3, 2022 +.Dt ZFS-ZONE 8 +.Os +. +.Sh NAME +.Nm zfs-zone , +.Nm zfs-unzone +.Nd attach and detach ZFS filesystems to user namespaces +.Sh SYNOPSIS +.Nm zfs Cm zone +.Ar nsfile +.Ar filesystem +.Nm zfs Cm unzone +.Ar nsfile +.Ar filesystem +. +.Sh DESCRIPTION +.Bl -tag -width "" +.It Xo +.Nm zfs +.Cm zone +.Ar nsfile +.Ar filesystem +.Xc +Attach the specified +.Ar filesystem +to the user namespace identified by +.Ar nsfile . +From now on this file system tree can be managed from within a user namespace +if the +.Sy zoned +property has been set. +.Pp +You cannot attach a zoned dataset's children to another user namespace. +You can also not attach the root file system +of the user namespace or any dataset +which needs to be mounted before the zfs service +is run inside the user namespace, +as it would be attached unmounted until it is +mounted from the service inside the user namespace. +.Pp +To allow management of the dataset from within a user namespace, the +.Sy zoned +property has to be set and the user namespaces needs access to the +.Pa /dev/zfs +device. +The +.Sy quota +property cannot be changed from within a user namespace. +.Pp +After a dataset is attached to a user namespace and the +.Sy zoned +property is set, +a zoned file system cannot be mounted outside the user namespace, +since the user namespace administrator might have set the mount point +to an unacceptable value. +.It Xo +.Nm zfs +.Cm unzone +.Ar nsfile +.Ar filesystem +.Xc +Detach the specified +.Ar filesystem +from the user namespace identified by +.Ar nsfile . +.El +.Sh EXAMPLES +.Ss Example 1 : No Delegating a Dataset to a User Namespace +The following example delegates the +.Ar tank/users +dataset to a user namespace identified by user namespace file +.Pa /proc/1234/ns/user . +.Dl # Nm zfs Cm zone Ar /proc/1234/ns/user Ar tank/users +. +.Sh SEE ALSO +.Xr zfsprops 7 diff --git a/module/Kbuild.in b/module/Kbuild.in index ed8dc23a90d3..14f2362810aa 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -65,7 +65,8 @@ SPL_OBJS := \ spl-tsd.o \ spl-vmem.o \ spl-xdr.o \ - spl-zlib.o + spl-zlib.o \ + spl-zone.o spl-objs += $(addprefix os/linux/spl/,$(SPL_OBJS)) diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c index f99a2f966660..5179100d1665 100644 --- a/module/os/linux/spl/spl-generic.c +++ b/module/os/linux/spl/spl-generic.c @@ -780,8 +780,13 @@ spl_init(void) if ((rc = spl_zlib_init())) goto out7; + if ((rc = spl_zone_init())) + goto out8; + return (rc); +out8: + spl_zlib_fini(); out7: spl_kstat_fini(); out6: @@ -801,6 +806,7 @@ spl_init(void) static void __exit spl_fini(void) { + spl_zone_fini(); spl_zlib_fini(); spl_kstat_fini(); spl_proc_fini(); diff --git a/module/os/linux/spl/spl-zone.c b/module/os/linux/spl/spl-zone.c new file mode 100644 index 000000000000..804c8010ccef --- /dev/null +++ b/module/os/linux/spl/spl-zone.c @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2021 Klara Systems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_USER_NS) +#include +#include +#endif + +static kmutex_t zone_datasets_lock; +static struct list_head zone_datasets; + +typedef struct zone_datasets { + struct list_head zds_list; /* zone_datasets linkage */ + struct user_namespace *zds_userns; /* namespace reference */ + struct list_head zds_datasets; /* datasets for the namespace */ +} zone_datasets_t; + +typedef struct zone_dataset { + struct list_head zd_list; /* zone_dataset linkage */ + size_t zd_dsnamelen; /* length of name */ + char zd_dsname[0]; /* name of the member dataset */ +} zone_dataset_t; + +#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) +/* + * Returns: + * - 0 on success + * - EBADF if it cannot open the provided file descriptor + * - ENOTTY if the file itself is a not a user namespace file. We want to + * intercept this error in the ZFS layer. We cannot just return one of the + * ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS + * and the SPL layers. + */ +static int +user_ns_get(int fd, struct user_namespace **userns) +{ + struct kstatfs st; + struct file *nsfile; + struct ns_common *ns; + int error; + + if ((nsfile = fget(fd)) == NULL) + return (EBADF); + if (vfs_statfs(&nsfile->f_path, &st) != 0) { + error = ENOTTY; + goto done; + } + if (st.f_type != NSFS_MAGIC) { + error = ENOTTY; + goto done; + } + ns = get_proc_ns(file_inode(nsfile)); + if (ns->ops->type != CLONE_NEWUSER) { + error = ENOTTY; + goto done; + } + *userns = container_of(ns, struct user_namespace, ns); + + error = 0; +done: + fput(nsfile); + + return (error); +} +#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ + +static unsigned int +user_ns_zoneid(struct user_namespace *user_ns) +{ + unsigned int r; + +#if defined(HAVE_USER_NS_COMMON_INUM) + r = user_ns->ns.inum; +#else + r = user_ns->proc_inum; +#endif + + return (r); +} + +static struct zone_datasets * +zone_datasets_lookup(unsigned int nsinum) +{ + zone_datasets_t *zds; + + list_for_each_entry(zds, &zone_datasets, zds_list) { + if (user_ns_zoneid(zds->zds_userns) == nsinum) + return (zds); + } + return (NULL); +} + +#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) +static struct zone_dataset * +zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen) +{ + zone_dataset_t *zd; + + list_for_each_entry(zd, &zds->zds_datasets, zd_list) { + if (zd->zd_dsnamelen != dsnamelen) + continue; + if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) + return (zd); + } + + return (NULL); +} + +static int +zone_dataset_cred_check(cred_t *cred) +{ + + if (!uid_eq(cred->uid, GLOBAL_ROOT_UID)) + return (EPERM); + + return (0); +} +#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ + +static int +zone_dataset_name_check(const char *dataset, size_t *dsnamelen) +{ + + if (dataset[0] == '\0' || dataset[0] == '/') + return (ENOENT); + + *dsnamelen = strlen(dataset); + /* Ignore trailing slash, if supplied. */ + if (dataset[*dsnamelen - 1] == '/') + (*dsnamelen)--; + + return (0); +} + +int +zone_dataset_attach(cred_t *cred, const char *dataset, int cleanup_fd) +{ +#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) + struct user_namespace *userns; + zone_datasets_t *zds; + zone_dataset_t *zd; + int error; + size_t dsnamelen; + + if ((error = zone_dataset_cred_check(cred)) != 0) + return (error); + if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0) + return (error); + if ((error = user_ns_get(cleanup_fd, &userns)) != 0) + return (error); + + mutex_enter(&zone_datasets_lock); + zds = zone_datasets_lookup(user_ns_zoneid(userns)); + if (zds == NULL) { + zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP); + INIT_LIST_HEAD(&zds->zds_list); + INIT_LIST_HEAD(&zds->zds_datasets); + zds->zds_userns = userns; + /* + * Lock the namespace by incresing its refcount to prevent + * the namespace ID from being reused. + */ + get_user_ns(userns); + list_add_tail(&zds->zds_list, &zone_datasets); + } else { + zd = zone_dataset_lookup(zds, dataset, dsnamelen); + if (zd != NULL) { + mutex_exit(&zone_datasets_lock); + return (EEXIST); + } + } + + zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP); + zd->zd_dsnamelen = dsnamelen; + strncpy(zd->zd_dsname, dataset, dsnamelen); + zd->zd_dsname[dsnamelen] = '\0'; + INIT_LIST_HEAD(&zd->zd_list); + list_add_tail(&zd->zd_list, &zds->zds_datasets); + + mutex_exit(&zone_datasets_lock); + return (0); +#else + return (ENXIO); +#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ +} +EXPORT_SYMBOL(zone_dataset_attach); + +int +zone_dataset_detach(cred_t *cred, const char *dataset, int cleanup_fd) +{ +#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) + struct user_namespace *userns; + zone_datasets_t *zds; + zone_dataset_t *zd; + int error; + size_t dsnamelen; + + if ((error = zone_dataset_cred_check(cred)) != 0) + return (error); + if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0) + return (error); + if ((error = user_ns_get(cleanup_fd, &userns)) != 0) + return (error); + + mutex_enter(&zone_datasets_lock); + zds = zone_datasets_lookup(user_ns_zoneid(userns)); + if (zds != NULL) + zd = zone_dataset_lookup(zds, dataset, dsnamelen); + if (zds == NULL || zd == NULL) { + mutex_exit(&zone_datasets_lock); + return (ENOENT); + } + + list_del(&zd->zd_list); + kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1); + + /* Prune the namespace entry if it has no more delegations. */ + if (list_empty(&zds->zds_datasets)) { + /* + * Decrease the refcount now that the namespace is no longer + * used. It is no longer necessary to prevent the namespace ID + * from being reused. + */ + put_user_ns(userns); + list_del(&zds->zds_list); + kmem_free(zds, sizeof (*zds)); + } + + mutex_exit(&zone_datasets_lock); + return (0); +#else + return (ENXIO); +#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ +} +EXPORT_SYMBOL(zone_dataset_detach); + +/* + * A dataset is visible if: + * - It is a parent of a namespace entry. + * - It is one of the namespace entries. + * - It is a child of a namespace entry. + * + * A dataset is writable if: + * - It is one of the namespace entries. + * - It is a child of a namespace entry. + * + * The parent datasets of namespace entries are visible and + * read-only to provide a path back to the root of the pool. + */ +int +zone_dataset_visible(const char *dataset, int *write) +{ + zone_datasets_t *zds; + zone_dataset_t *zd; + size_t dsnamelen, zd_len; + int visible; + + /* Default to read-only, in case visible is returned. */ + if (write != NULL) + *write = 0; + if (zone_dataset_name_check(dataset, &dsnamelen) != 0) + return (0); + if (INGLOBALZONE(curproc)) { + if (write != NULL) + *write = 1; + return (1); + } + + mutex_enter(&zone_datasets_lock); + zds = zone_datasets_lookup(crgetzoneid(curproc->cred)); + if (zds == NULL) { + mutex_exit(&zone_datasets_lock); + return (0); + } + + visible = 0; + list_for_each_entry(zd, &zds->zds_datasets, zd_list) { + zd_len = strlen(zd->zd_dsname); + if (zd_len > dsnamelen) { + /* + * The name of the namespace entry is longer than that + * of the dataset, so it could be that the dataset is a + * parent of the namespace entry. + */ + visible = memcmp(zd->zd_dsname, dataset, + dsnamelen) == 0 && + zd->zd_dsname[dsnamelen] == '/'; + if (visible) + break; + } else if (zd_len == dsnamelen) { + /* + * The name of the namespace entry is as long as that + * of the dataset, so perhaps the dataset itself is the + * namespace entry. + */ + visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0; + if (visible) { + if (write != NULL) + *write = 1; + break; + } + } else { + /* + * The name of the namespace entry is shorter than that + * of the dataset, so perhaps the dataset is a child of + * the namespace entry. + */ + visible = memcmp(zd->zd_dsname, dataset, + zd_len) == 0 && dataset[zd_len] == '/'; + if (visible) { + if (write != NULL) + *write = 1; + break; + } + } + } + + mutex_exit(&zone_datasets_lock); + return (visible); +} +EXPORT_SYMBOL(zone_dataset_visible); + +unsigned int +global_zoneid(void) +{ + unsigned int z = 0; + +#if defined(CONFIG_USER_NS) + z = user_ns_zoneid(&init_user_ns); +#endif + + return (z); +} +EXPORT_SYMBOL(global_zoneid); + +unsigned int +crgetzoneid(const cred_t *cr) +{ + unsigned int r = 0; + +#if defined(CONFIG_USER_NS) + r = user_ns_zoneid(cr->user_ns); +#endif + + return (r); +} +EXPORT_SYMBOL(crgetzoneid); + +boolean_t +inglobalzone(proc_t *proc) +{ +#if defined(CONFIG_USER_NS) + return (proc->cred->user_ns == &init_user_ns); +#else + return (B_TRUE); +#endif +} +EXPORT_SYMBOL(inglobalzone); + +int +spl_zone_init(void) +{ + mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL); + INIT_LIST_HEAD(&zone_datasets); + return (0); +} + +void +spl_zone_fini(void) +{ + zone_datasets_t *zds; + zone_dataset_t *zd; + + /* + * It would be better to assert an empty zone_datasets, but since + * there's no automatic mechanism for cleaning them up if the user + * namespace is destroyed, just do it here, since spl is about to go + * out of context. + */ + while (!list_empty(&zone_datasets)) { + zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list); + while (!list_empty(&zds->zds_datasets)) { + zd = list_entry(zds->zds_datasets.next, + zone_dataset_t, zd_list); + list_del(&zd->zd_list); + kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1); + put_user_ns(zds->zds_userns); + } + list_del(&zds->zds_list); + kmem_free(zds, sizeof (*zds)); + } + mutex_destroy(&zone_datasets_lock); +} diff --git a/module/os/linux/zfs/policy.c b/module/os/linux/zfs/policy.c index 5a52092bb90a..ab00d2ae14d2 100644 --- a/module/os/linux/zfs/policy.c +++ b/module/os/linux/zfs/policy.c @@ -61,7 +61,7 @@ priv_policy_ns(const cred_t *cr, int capability, int err, static int priv_policy(const cred_t *cr, int capability, int err) { - return (priv_policy_ns(cr, capability, err, NULL)); + return (priv_policy_ns(cr, capability, err, cr->user_ns)); } static int diff --git a/module/os/linux/zfs/zfs_ioctl_os.c b/module/os/linux/zfs/zfs_ioctl_os.c index c65702e1a053..67b864aa77a9 100644 --- a/module/os/linux/zfs/zfs_ioctl_os.c +++ b/module/os/linux/zfs/zfs_ioctl_os.c @@ -37,6 +37,7 @@ * Copyright 2017 RackTop Systems. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2019 Datto Inc. + * Copyright (c) 2021 Klara, Inc. */ #include @@ -150,6 +151,48 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) } +static int +zfs_ioc_userns_attach(zfs_cmd_t *zc) +{ + int error; + + if (zc == NULL) + return (SET_ERROR(EINVAL)); + + error = zone_dataset_attach(CRED(), zc->zc_name, zc->zc_cleanup_fd); + + /* + * Translate ENOTTY to ZFS_ERR_NOT_USER_NAMESPACE as we just arrived + * back from the SPL layer, which does not know about ZFS_ERR_* errors. + * See the comment at the user_ns_get() function in spl-zone.c for + * details. + */ + if (error == ENOTTY) + error = ZFS_ERR_NOT_USER_NAMESPACE; + + return (error); +} + +static int +zfs_ioc_userns_detach(zfs_cmd_t *zc) +{ + int error; + + if (zc == NULL) + return (SET_ERROR(EINVAL)); + + error = zone_dataset_detach(CRED(), zc->zc_name, zc->zc_cleanup_fd); + + /* + * See the comment in zfs_ioc_userns_attach() for details on what is + * going on here. + */ + if (error == ENOTTY) + error = ZFS_ERR_NOT_USER_NAMESPACE; + + return (error); +} + uint64_t zfs_max_nvlist_src_size_os(void) { @@ -168,6 +211,10 @@ zfs_ioctl_update_mount_cache(const char *dsname) void zfs_ioctl_init_os(void) { + zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_ATTACH, + zfs_ioc_userns_attach, zfs_secpolicy_config, POOL_CHECK_NONE); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_DETACH, + zfs_ioc_userns_detach, zfs_secpolicy_config, POOL_CHECK_NONE); } #ifdef CONFIG_COMPAT diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index 81a059651e8a..a67ba821d06f 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -1453,14 +1453,34 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent) int error = 0; zfsvfs_t *zfsvfs = NULL; vfs_t *vfs = NULL; + int canwrite; + int dataset_visible_zone; ASSERT(zm); ASSERT(osname); + dataset_visible_zone = zone_dataset_visible(osname, &canwrite); + + /* + * Refuse to mount a filesystem if we are in a namespace and the + * dataset is not visible or writable in that namespace. + */ + if (!INGLOBALZONE(curproc) && + (!dataset_visible_zone || !canwrite)) { + return (SET_ERROR(EPERM)); + } + error = zfsvfs_parse_options(zm->mnt_data, &vfs); if (error) return (error); + /* + * If a non-writable filesystem is being mounted without the + * read-only flag, pretend it was set, as done for snapshots. + */ + if (!canwrite) + vfs->vfs_readonly = true; + error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs); if (error) { zfsvfs_vfs_free(vfs); diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index c2fd3fee1401..b18efde9b18a 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -360,6 +360,7 @@ const struct super_operations zpl_super_operations = { struct file_system_type zpl_fs_type = { .owner = THIS_MODULE, .name = ZFS_DRIVER, + .fs_flags = FS_USERNS_MOUNT, .mount = zpl_mount, .kill_sb = zpl_kill_sb, }; diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index fa71f412ba6c..9b32e73afb1e 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -177,7 +177,8 @@ tests = ['upgrade_projectquota_001_pos'] tags = ['functional', 'upgrade'] [tests/functional/user_namespace:Linux] -tests = ['user_namespace_001'] +tests = ['user_namespace_001', 'user_namespace_002', 'user_namespace_003', + 'user_namespace_004'] tags = ['functional', 'user_namespace'] [tests/functional/userquota:Linux] diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index 1ee786d131d7..47357dca57fb 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -146,11 +146,13 @@ export SYSTEM_FILES_LINUX='attr mkswap modprobe mpstat + nsenter parted perf setfattr sha256sum udevadm + unshare useradd userdel usermod diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 5b0ff5eee4ea..f901a405a960 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1895,6 +1895,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/user_namespace/cleanup.ksh \ functional/user_namespace/setup.ksh \ functional/user_namespace/user_namespace_001.ksh \ + functional/user_namespace/user_namespace_002.ksh \ + functional/user_namespace/user_namespace_003.ksh \ + functional/user_namespace/user_namespace_004.ksh \ functional/userquota/cleanup.ksh \ functional/userquota/groupspace_001_pos.ksh \ functional/userquota/groupspace_002_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh index 3d19c4273e24..39aad91d0c61 100755 --- a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh +++ b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh @@ -47,6 +47,11 @@ function cleanup done } +unshare -Urm echo test +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to create user namespace" +fi + log_onexit cleanup log_assert "Check root in user namespaces" diff --git a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh new file mode 100755 index 000000000000..a5f76014ab85 --- /dev/null +++ b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh @@ -0,0 +1,115 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib + +# +# DESCRIPTION: +# Regression test for delegation of datasets to user namespaces. +# +# STRATEGY: +# 1. Delegate a dataset to a user namespace. +# 2. Check that 'zfs list' is only able to see inside the delegation. +# 3. Check that 'zfs create' is able to create only inside the delegation. +# 4. Check that the filesystems can be mounted inside the delegation, +# and that file permissions are appropriate. +# 5. Check that 'zfs destroy' is able to destroy only inside the delegation. +# 6. Check that 'zfs unzone' has a desirable effect. +# + +verify_runnable "both" + +user_ns_cleanup() { + if [ -n "$proc_ns_added" ]; then + log_must zfs unzone $proc_ns_added $TESTPOOL/userns + fi + if [ -n "$unshared_pid" ]; then + kill -9 $unshared_pid + # Give it a sec to make the global cleanup more reliable. + sleep 1 + fi + log_must zfs destroy -r $TESTPOOL/userns +} + +log_onexit user_ns_cleanup + +log_assert "Check zfs/zpool command delegation in user namespaces" + +# Create the baseline datasets. +log_must zfs create -o zoned=on $TESTPOOL/userns +log_must zfs create -o zoned=on $TESTPOOL/userns/testds +# Partial match should be denied; hence we also set this to be 'zoned'. +log_must zfs create -o zoned=on $TESTPOOL/user + +# 1. Create a user namespace with a cloned mount namespace, then delegate. +unshare -Urm echo test +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to create user namespace" +fi +unshare -Urm /usr/bin/sleep 1h & +unshared_pid=$! +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to create user namespace" +fi +proc_ns=/proc/$unshared_pid/ns/user +sleep 2 # Wait for unshare to acquire user namespace +log_note "unshare: child=${unshared_pid} proc_ns=${proc_ns}" + +NSENTER="nsenter -t $unshared_pid --all" + +$NSENTER echo test +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to enter user namespace" +fi + +# 1b. Pre-test by checking that 'zone' does something new. +list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')" +log_must test -z "$list" +log_must zfs zone $proc_ns $TESTPOOL/userns +proc_ns_added="$ns" + +# 2. 'zfs list' +list="$($NSENTER zfs list -r -H -o name $TESTPOOL | tr '\n' ' ')" +log_must test "$list" = "$TESTPOOL $TESTPOOL/userns $TESTPOOL/userns/testds " + +# 3. 'zfs create' +log_must $NSENTER zfs create $TESTPOOL/userns/created +log_mustnot $NSENTER zfs create $TESTPOOL/user/created + +# 4. Check file permissions (create mounts the filesystem). The 'permissions' +# check is simply, does it get mapped to user namespace's root/root? +log_must $NSENTER df -h /$TESTPOOL/userns/created +log_must $NSENTER mkfile 8192 /$TESTPOOL/userns/created/testfile +uidgid=$($NSENTER stat -c '%u %g' /$TESTPOOL/userns/created/testfile) +log_must test "${uidgid}" = "0 0" + +# 5. 'zfs destroy' +log_must $NSENTER zfs destroy $TESTPOOL/userns/created +log_mustnot $NSENTER zfs destroy $TESTPOOL/user + +# 6. 'zfs unzone' should have an effect +log_must zfs unzone $proc_ns $TESTPOOL/userns +proc_ns_added="" +list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')" +log_must test -z "$list" + +log_pass "Check zfs/zpool command delegation in user namespaces" diff --git a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh new file mode 100755 index 000000000000..20a7f6677d20 --- /dev/null +++ b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh @@ -0,0 +1,97 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib + +# +# DESCRIPTION: +# Regression test for delegation of datasets to user namespaces. +# +# STRATEGY: +# 1. Delegate two datasets with distinctive names to a user namespace. +# 2. Check that 'zfs list' is not able to see datasets outside of the +# delegation, which have a prefix matching one of the delegated sets. +# Also, check that all the delegated sets are visible. +# + +verify_runnable "both" + +user_ns_cleanup() { + if [ -n "$proc_ns_added" ]; then + log_must zfs unzone $proc_ns_added $TESTPOOL/userns + log_must zfs unzone $proc_ns_added $TESTPOOL/otheruserns + fi + if [ -n "$unshared_pid" ]; then + kill -9 $unshared_pid + # Give it a sec to make the global cleanup more reliable. + sleep 1 + fi + log_must zfs destroy -r $TESTPOOL/userns + log_must zfs destroy -r $TESTPOOL/usernsisitnot + log_must zfs destroy -r $TESTPOOL/otheruserns +} + +log_onexit user_ns_cleanup + +log_assert "Check zfs list command handling of dataset visibility in user namespaces" + +# Create the baseline dataset. +log_must zfs create -o zoned=on $TESTPOOL/userns +# Datasets with a prefix matching the delegated dataset should not be +# automatically considered visible. +log_must zfs create -o zoned=on $TESTPOOL/usernsisitnot +# All delegated datasets should be visible. +log_must zfs create -o zoned=on $TESTPOOL/otheruserns + +# 1. Create a user namespace with a cloned mount namespace, then delegate. +unshare -Urm echo test +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to create user namespace" +fi +unshare -Urm /usr/bin/sleep 1h & +unshared_pid=$! +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to create user namespace" +fi +proc_ns=/proc/$unshared_pid/ns/user +sleep 2 # Wait for unshare to acquire user namespace +log_note "unshare: child=${unshared_pid} proc_ns=${proc_ns}" + +NSENTER="nsenter -t $unshared_pid --all" + +$NSENTER echo test +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to enter user namespace" +fi + +# 1b. Pre-test by checking that 'zone' does something new. +list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')" +log_must test -z "$list" +log_must zfs zone $proc_ns $TESTPOOL/userns +log_must zfs zone $proc_ns $TESTPOOL/otheruserns +proc_ns_added="$ns" + +# 2. 'zfs list' +list="$($NSENTER zfs list -r -H -o name $TESTPOOL | tr '\n' ' ')" +log_must test "$list" = "$TESTPOOL $TESTPOOL/otheruserns $TESTPOOL/userns " + +log_pass "Check zfs list command handling of dataset visibility in user namespaces" diff --git a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh new file mode 100755 index 000000000000..6edb0413c98a --- /dev/null +++ b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh @@ -0,0 +1,67 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib + +# +# DESCRIPTION: +# Regression test for safeguards around the delegation of datasets to +# user namespaces. +# +# STRATEGY: +# 1. Check that 'zfs zone' correctly handles the case of the first +# argument being a non-namespace file. +# 2. Check that 'zfs zone' correctly handles the case of the first +# argument being a non-namespace and non-existent file. +# + +verify_runnable "both" + +user_ns_cleanup() { + if [ -n "$temp_file" ]; then + log_must rm -f "$temp_file" + fi + + log_must zfs destroy -r "$TESTPOOL/userns" +} + +log_onexit user_ns_cleanup + +log_assert "Check zfs zone command handling of non-namespace files" + +# Pass if user namespaces are not supported. +unshare -Urm echo test +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to create user namespace" +fi + +# Create the baseline datasets. +log_must zfs create -o zoned=on "$TESTPOOL/userns" + +# 1. Try to pass a non-namespace file to zfs zone. +temp_file="$(TMPDIR=$TEST_BASE_DIR mktemp)" +log_mustnot zfs zone "$temp_file" "$TESTPOOL/userns" + +# 2. Try to pass a non-namespace and non-existent file to zfs zone. +log_mustnot zfs zone "$TEMP_BASE_DIR/nonexistent" "$TESTPOOL/userns" + +log_pass "Check zfs zone command handling of non-namespace files"