From a2f1945ee3ce8eba02ef54168c72f3be8f2cebdc Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 22 Nov 2013 11:20:41 -0800 Subject: [PATCH 01/11] Add a unique "eid" value to all zevents Tagging each zevent with a unique monotonically increasing EID (Event IDentifier) provides the required infrastructure for a user space daemon to reliably process zevents. By writing the EID to persistent storage the daemon can safely resume where it left off in the event stream when it's restarted. Signed-off-by: Brian Behlendorf Signed-off-by: Chris Dunlap Issue #2 --- include/sys/fm/protocol.h | 1 + include/sys/fm/util.h | 1 + module/zfs/fm.c | 16 ++++++++++++++++ 3 files changed, 18 insertions(+) diff --git a/include/sys/fm/protocol.h b/include/sys/fm/protocol.h index 1ee221286cef..de05bb296741 100644 --- a/include/sys/fm/protocol.h +++ b/include/sys/fm/protocol.h @@ -70,6 +70,7 @@ extern "C" { #define FM_EREPORT_DETECTOR "detector" #define FM_EREPORT_ENA "ena" #define FM_EREPORT_TIME "time" +#define FM_EREPORT_EID "eid" /* list.* event payload member names */ #define FM_LIST_EVENT_SIZE "list-sz" diff --git a/include/sys/fm/util.h b/include/sys/fm/util.h index 2f03d1011a22..9dfd436c1b43 100644 --- a/include/sys/fm/util.h +++ b/include/sys/fm/util.h @@ -81,6 +81,7 @@ typedef struct zevent_s { list_t ev_ze_list; /* " */ list_node_t ev_node; /* " */ zevent_cb_t *ev_cb; /* " */ + uint64_t ev_eid; } zevent_t; typedef struct zfs_zevent { diff --git a/module/zfs/fm.c b/module/zfs/fm.c index 002827b520cf..fe9223ff8186 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -84,6 +84,14 @@ static int zevent_len_cur = 0; static int zevent_waiters = 0; static int zevent_flags = 0; +/* + * The EID (Event IDentifier) is used to uniquely tag a zevent when it is + * posted. The posted EIDs are monotonically increasing but not persistent. + * They will be reset to the initial value (1) each time the kernel module is + * loaded. + */ +static uint64_t zevent_eid = 0; + static kmutex_t zevent_lock; static list_t zevent_list; static kcondvar_t zevent_cv; @@ -498,6 +506,7 @@ zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) { int64_t tv_array[2]; timestruc_t tv; + uint64_t eid; size_t nvl_size = 0; zevent_t *ev; @@ -509,6 +518,12 @@ zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) return; } + eid = atomic_inc_64_nv(&zevent_eid); + if (nvlist_add_uint64(nvl, FM_EREPORT_EID, eid)) { + atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); + return; + } + (void) nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE); if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) { atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); @@ -527,6 +542,7 @@ zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) ev->ev_nvl = nvl; ev->ev_detector = detector; ev->ev_cb = cb; + ev->ev_eid = eid; mutex_enter(&zevent_lock); zfs_zevent_insert(ev); From 75e3ff58feda2d836c4546c5dc2e98fcd2d1a67b Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 22 Nov 2013 14:52:16 -0800 Subject: [PATCH 02/11] Add zpool_events_seek() functionality The ZFS_IOC_EVENTS_SEEK ioctl was added to allow user space callers to seek around the zevent file descriptor by EID. When a specific EID is passed and it exists the cursor will be positioned there. If the EID is no longer cached by the kernel ENOENT is returned. The caller may also pass ZEVENT_SEEK_START or ZEVENT_SEEK_END to seek to those respective locations. Signed-off-by: Brian Behlendorf Signed-off-by: Chris Dunlap Issue #2 --- include/libzfs.h | 1 + include/sys/fm/util.h | 3 +- include/sys/fs/zfs.h | 1 + include/sys/zfs_ioctl.h | 3 ++ lib/libzfs/libzfs_pool.c | 36 ++++++++++++++++++++++++ module/zfs/fm.c | 61 ++++++++++++++++++++++++++++++++++++++++ module/zfs/zfs_ioctl.c | 24 ++++++++++++++++ 7 files changed, 128 insertions(+), 1 deletion(-) diff --git a/include/libzfs.h b/include/libzfs.h index 55dd34c99de1..cb78f1d62931 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -410,6 +410,7 @@ extern int zpool_history_unpack(char *, uint64_t, uint64_t *, nvlist_t ***, uint_t *); extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, int, int); extern int zpool_events_clear(libzfs_handle_t *, int *); +extern int zpool_events_seek(libzfs_handle_t *, uint64_t, int); extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *, size_t len); extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *); diff --git a/include/sys/fm/util.h b/include/sys/fm/util.h index 9dfd436c1b43..18fe49073239 100644 --- a/include/sys/fm/util.h +++ b/include/sys/fm/util.h @@ -71,7 +71,7 @@ typedef struct erpt_dump { #ifdef _KERNEL -#define ZEVENT_SHUTDOWN 0x1 +#define ZEVENT_SHUTDOWN 0x1 typedef void zevent_cb_t(nvlist_t *, nvlist_t *); @@ -99,6 +99,7 @@ extern int zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **); extern void zfs_zevent_fd_rele(int); extern int zfs_zevent_next(zfs_zevent_t *, nvlist_t **, uint64_t *, uint64_t *); extern int zfs_zevent_wait(zfs_zevent_t *); +extern int zfs_zevent_seek(zfs_zevent_t *, uint64_t); extern void zfs_zevent_init(zfs_zevent_t **); extern void zfs_zevent_destroy(zfs_zevent_t *); diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 882e9e84ab85..ae72f834de52 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -864,6 +864,7 @@ typedef enum zfs_ioc { ZFS_IOC_LINUX = ('Z' << 8) + 0x80, ZFS_IOC_EVENTS_NEXT, ZFS_IOC_EVENTS_CLEAR, + ZFS_IOC_EVENTS_SEEK, /* * FreeBSD - 1/64 numbers reserved. diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index dad611328cbc..c63b16c78d32 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -262,6 +262,9 @@ typedef struct zinject_record { #define ZEVENT_NONBLOCK 0x1 #define ZEVENT_SIZE 1024 +#define ZEVENT_SEEK_START 0 +#define ZEVENT_SEEK_END UINT64_MAX + typedef enum zinject_type { ZINJECT_UNINITIALIZED, ZINJECT_DATA_FAULT, diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index b822ace688c9..1b8f3b63a82a 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3870,6 +3870,42 @@ zpool_events_clear(libzfs_handle_t *hdl, int *count) return (0); } +/* + * Seek to a specific EID, ZEVENT_SEEK_START, or ZEVENT_SEEK_END for + * the passed zevent_fd file handle. On success zero is returned, + * otherwise -1 is returned and hdl->libzfs_error is set to the errno. + */ +int +zpool_events_seek(libzfs_handle_t *hdl, uint64_t eid, int zevent_fd) +{ + zfs_cmd_t zc = {"\0"}; + int error = 0; + + zc.zc_guid = eid; + zc.zc_cleanup_fd = zevent_fd; + + if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_SEEK, &zc) != 0) { + switch (errno) { + case ENOENT: + error = zfs_error_fmt(hdl, EZFS_NOENT, + dgettext(TEXT_DOMAIN, "cannot get event")); + break; + + case ENOMEM: + error = zfs_error_fmt(hdl, EZFS_NOMEM, + dgettext(TEXT_DOMAIN, "cannot get event")); + break; + + default: + error = zpool_standard_error_fmt(hdl, errno, + dgettext(TEXT_DOMAIN, "cannot get event")); + break; + } + } + + return (error); +} + void zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, char *pathname, size_t len) diff --git a/module/zfs/fm.c b/module/zfs/fm.c index fe9223ff8186..246b3d2cf606 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -664,6 +664,67 @@ zfs_zevent_wait(zfs_zevent_t *ze) return (error); } +/* + * The caller may seek to a specific EID by passing that EID. If the EID + * is still available in the posted list of events the cursor is positioned + * there. Otherwise ENOENT is returned and the cursor is not moved. + * + * There are two reserved EIDs which may be passed and will never fail. + * ZEVENT_SEEK_START positions the cursor at the start of the list, and + * ZEVENT_SEEK_END positions the cursor at the end of the list. + */ +int +zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid) +{ + zevent_t *ev; + int error = 0; + + mutex_enter(&zevent_lock); + + if (eid == ZEVENT_SEEK_START) { + if (ze->ze_zevent) + list_remove(&ze->ze_zevent->ev_ze_list, ze); + + ze->ze_zevent = NULL; + goto out; + } + + if (eid == ZEVENT_SEEK_END) { + if (ze->ze_zevent) + list_remove(&ze->ze_zevent->ev_ze_list, ze); + + ev = list_head(&zevent_list); + if (ev) { + ze->ze_zevent = ev; + list_insert_head(&ev->ev_ze_list, ze); + } else { + ze->ze_zevent = NULL; + } + + goto out; + } + + for (ev = list_tail(&zevent_list); ev != NULL; + ev = list_prev(&zevent_list, ev)) { + if (ev->ev_eid == eid) { + if (ze->ze_zevent) + list_remove(&ze->ze_zevent->ev_ze_list, ze); + + ze->ze_zevent = ev; + list_insert_head(&ev->ev_ze_list, ze); + break; + } + } + + if (ev == NULL) + error = ENOENT; + +out: + mutex_exit(&zevent_lock); + + return (error); +} + void zfs_zevent_init(zfs_zevent_t **zep) { diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 5951bc673cb3..cd47790166f5 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -4919,6 +4919,28 @@ zfs_ioc_events_clear(zfs_cmd_t *zc) return (0); } +/* + * inputs: + * zc_guid eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END + * zc_cleanup zevent file descriptor + */ +static int +zfs_ioc_events_seek(zfs_cmd_t *zc) +{ + zfs_zevent_t *ze; + minor_t minor; + int error; + + error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze); + if (error != 0) + return (error); + + error = zfs_zevent_seek(ze, zc->zc_guid); + zfs_zevent_fd_rele(zc->zc_cleanup_fd); + + return (error); +} + /* * inputs: * zc_name name of new filesystem or snapshot @@ -5393,6 +5415,8 @@ zfs_ioctl_init(void) zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE); zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear, zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek, + zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE); } int From 9b101a73203faecdc6d466fe446d787bc341ee14 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 22 Nov 2013 16:00:39 -0800 Subject: [PATCH 03/11] Clarify zpool_events_next() comment Due to the very poorly chosen argument name 'cleanup_fd' it was completely unclear that this file descriptor is used to track the current cursor location. When the file descriptor is created by opening ZFS_DEV a private cursor is created in the kernel for the returned file descriptor. Subsequent calls to zpool_events_next() and zpool_events_seek() then require the file descriptor as an argument to reposition the cursor. When the file descriptor is closed the kernel state tracking the cursor is destroyed. This patch contains no functional change, it just changes a few variable names and clarifies the documentation. Signed-off-by: Brian Behlendorf Signed-off-by: Chris Dunlap Issue #2 --- cmd/zpool/zpool_main.c | 10 +++++----- lib/libzfs/libzfs_pool.c | 20 ++++++++++---------- module/zfs/zfs_ioctl.c | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index d095b15a5864..4254f9b4d7c5 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -5455,17 +5455,17 @@ static int zpool_do_events_next(ev_opts_t *opts) { nvlist_t *nvl; - int cleanup_fd, ret, dropped; + int zevent_fd, ret, dropped; - cleanup_fd = open(ZFS_DEV, O_RDWR); - VERIFY(cleanup_fd >= 0); + zevent_fd = open(ZFS_DEV, O_RDWR); + VERIFY(zevent_fd >= 0); if (!opts->scripted) (void) printf(gettext("%-30s %s\n"), "TIME", "CLASS"); while (1) { ret = zpool_events_next(g_zfs, &nvl, &dropped, - !!opts->follow, cleanup_fd); + !!opts->follow, zevent_fd); if (ret || nvl == NULL) break; @@ -5483,7 +5483,7 @@ zpool_do_events_next(ev_opts_t *opts) nvlist_free(nvl); } - VERIFY(0 == close(cleanup_fd)); + VERIFY(0 == close(zevent_fd)); return (ret); } diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 1b8f3b63a82a..2054385b8d35 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3783,25 +3783,25 @@ zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp) } /* - * Retrieve the next event. If there is a new event available 'nvp' will - * contain a newly allocated nvlist and 'dropped' will be set to the number - * of missed events since the last call to this function. When 'nvp' is - * set to NULL it indicates no new events are available. In either case - * the function returns 0 and it is up to the caller to free 'nvp'. In - * the case of a fatal error the function will return a non-zero value. - * When the function is called in blocking mode it will not return until - * a new event is available. + * Retrieve the next event given the passed 'zevent_fd' file descriptor. + * If there is a new event available 'nvp' will contain a newly allocated + * nvlist and 'dropped' will be set to the number of missed events since + * the last call to this function. When 'nvp' is set to NULL it indicates + * no new events are available. In either case the function returns 0 and + * it is up to the caller to free 'nvp'. In the case of a fatal error the + * function will return a non-zero value. When the function is called in + * blocking mode it will not return until a new event is available. */ int zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp, - int *dropped, int block, int cleanup_fd) + int *dropped, int block, int zevent_fd) { zfs_cmd_t zc = {"\0"}; int error = 0; *nvp = NULL; *dropped = 0; - zc.zc_cleanup_fd = cleanup_fd; + zc.zc_cleanup_fd = zevent_fd; if (!block) zc.zc_guid = ZEVENT_NONBLOCK; diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index cd47790166f5..0dfda1abf77c 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -4860,11 +4860,11 @@ zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist) /* * inputs: * zc_guid flags (ZEVENT_NONBLOCK) + * zc_cleanup_fd zevent file descriptor * * outputs: * zc_nvlist_dst next nvlist event * zc_cookie dropped events since last get - * zc_cleanup_fd cleanup-on-exit file descriptor */ static int zfs_ioc_events_next(zfs_cmd_t *zc) From 07917db9908516aa3fd55d39d2c1792aca8bebcd Mon Sep 17 00:00:00 2001 From: Chris Dunlap Date: Mon, 6 Jan 2014 16:15:45 -0800 Subject: [PATCH 04/11] Add defs for makefile installation dir vars Add macro definitions to AM_CPPFLAGS to propagate makefile installation directory variables for libexecdir, runstatedir, sbindir, and sysconfdir. https://www.gnu.org/software/autoconf/manual/autoconf-2.69/html_node/Installation-Directory-Variables.html A corollary is that you should not use these variables except in makefiles. For instance, instead of trying to evaluate datadir in configure and hard-coding it in makefiles using e.g., 'AC_DEFINE_UNQUOTED([DATADIR], ["$datadir"], [Data directory.])', you should add -DDATADIR='$(datadir)' to your makefile's definition of CPPFLAGS (AM_CPPFLAGS if you are also using Automake). The runstatedir directory is for "installing data files which the programs modify while they run, that pertain to one specific machine, and which need not persist longer than the execution of the program". https://www.gnu.org/prep/standards/html_node/Directory-Variables.html It will be defined by autoconf 2.70 or later, and default to "$(localstatedir)/run". http://git.savannah.gnu.org/gitweb/?p=autoconf.git;a=commit;h=a197431414088a417b407b9b20583b2e8f7363bd Signed-off-by: Chris Dunlap Signed-off-by: Brian Behlendorf Issue #2 --- config/Rules.am | 4 ++++ config/user-runstatedir.m4 | 6 ++++++ config/user.m4 | 1 + 3 files changed, 11 insertions(+) create mode 100644 config/user-runstatedir.m4 diff --git a/config/Rules.am b/config/Rules.am index 24f94264ba86..4fb40c4e456a 100644 --- a/config/Rules.am +++ b/config/Rules.am @@ -8,3 +8,7 @@ AM_CFLAGS += -fno-strict-aliasing AM_CPPFLAGS = -D_GNU_SOURCE -D__EXTENSIONS__ -D_REENTRANT AM_CPPFLAGS += -D_POSIX_PTHREAD_SEMANTICS -D_FILE_OFFSET_BITS=64 AM_CPPFLAGS += -D_LARGEFILE64_SOURCE -DTEXT_DOMAIN=\"zfs-linux-user\" +AM_CPPFLAGS += -DLIBEXECDIR=\"$(libexecdir)\" +AM_CPPFLAGS += -DRUNSTATEDIR=\"$(runstatedir)\" +AM_CPPFLAGS += -DSBINDIR=\"$(sbindir)\" +AM_CPPFLAGS += -DSYSCONFDIR=\"$(sysconfdir)\" diff --git a/config/user-runstatedir.m4 b/config/user-runstatedir.m4 new file mode 100644 index 000000000000..ded1362c7b22 --- /dev/null +++ b/config/user-runstatedir.m4 @@ -0,0 +1,6 @@ +dnl For backwards compatibility; runstatedir added in autoconf 2.70. +AC_DEFUN([ZFS_AC_CONFIG_USER_RUNSTATEDIR], [ + if test "x$runstatedir" = x; then + AC_SUBST([runstatedir], ['${localstatedir}/run']) + fi +]) diff --git a/config/user.m4 b/config/user.m4 index da1cdea3318c..6c127820c94d 100644 --- a/config/user.m4 +++ b/config/user.m4 @@ -12,4 +12,5 @@ AC_DEFUN([ZFS_AC_CONFIG_USER], [ ZFS_AC_CONFIG_USER_LIBUUID ZFS_AC_CONFIG_USER_LIBBLKID ZFS_AC_CONFIG_USER_FRAME_LARGER_THAN + ZFS_AC_CONFIG_USER_RUNSTATEDIR ]) From 8c7aa0cfc47578d1d38f80ecb7c66eed7cde5c59 Mon Sep 17 00:00:00 2001 From: Chris Dunlap Date: Wed, 12 Feb 2014 10:30:18 -0800 Subject: [PATCH 05/11] Replace zpool_events_next() "block" parm w/ "flags" zpool_events_next() can be called in blocking mode by specifying a non-zero value for the "block" parameter. However, the design of the ZFS Event Daemon (zed) requires additional functionality from zpool_events_next(). Instead of adding additional arguments to the function, it makes more sense to use flags that can be bitwise-or'd together. This commit replaces the zpool_events_next() int "block" parameter with an unsigned bitwise "flags" parameter. It also defines ZEVENT_NONE to specify the default behavior. Since non-blocking mode can be specified with the existing ZEVENT_NONBLOCK flag, the default behavior becomes blocking mode. This, in effect, inverts the previous use of the "block" parameter. Existing callers of zpool_events_next() have been modified to check for the ZEVENT_NONBLOCK flag. Signed-off-by: Chris Dunlap Signed-off-by: Brian Behlendorf Issue #2 --- cmd/zpool/zpool_main.c | 3 ++- include/libzfs.h | 3 ++- include/sys/zfs_ioctl.h | 1 + lib/libzfs/libzfs_pool.c | 9 +++++---- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 4254f9b4d7c5..cc13e3739394 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -5465,7 +5466,7 @@ zpool_do_events_next(ev_opts_t *opts) while (1) { ret = zpool_events_next(g_zfs, &nvl, &dropped, - !!opts->follow, zevent_fd); + (opts->follow ? ZEVENT_NONE : ZEVENT_NONBLOCK), zevent_fd); if (ret || nvl == NULL) break; diff --git a/include/libzfs.h b/include/libzfs.h index cb78f1d62931..5bc8b03ef4b3 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -408,7 +408,8 @@ extern int zpool_upgrade(zpool_handle_t *, uint64_t); extern int zpool_get_history(zpool_handle_t *, nvlist_t **); extern int zpool_history_unpack(char *, uint64_t, uint64_t *, nvlist_t ***, uint_t *); -extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, int, int); +extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, unsigned, + int); extern int zpool_events_clear(libzfs_handle_t *, int *); extern int zpool_events_seek(libzfs_handle_t *, uint64_t, int); extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *, diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index c63b16c78d32..0ab095c1ae95 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -259,6 +259,7 @@ typedef struct zinject_record { #define ZINJECT_FLUSH_ARC 0x2 #define ZINJECT_UNLOAD_SPA 0x4 +#define ZEVENT_NONE 0x0 #define ZEVENT_NONBLOCK 0x1 #define ZEVENT_SIZE 1024 diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 2054385b8d35..db1f0d7cf2be 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3790,11 +3790,12 @@ zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp) * no new events are available. In either case the function returns 0 and * it is up to the caller to free 'nvp'. In the case of a fatal error the * function will return a non-zero value. When the function is called in - * blocking mode it will not return until a new event is available. + * blocking mode (the default, unless the ZEVENT_NONBLOCK flag is passed), + * it will not return until a new event is available. */ int zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp, - int *dropped, int block, int zevent_fd) + int *dropped, unsigned flags, int zevent_fd) { zfs_cmd_t zc = {"\0"}; int error = 0; @@ -3803,7 +3804,7 @@ zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp, *dropped = 0; zc.zc_cleanup_fd = zevent_fd; - if (!block) + if (flags & ZEVENT_NONBLOCK) zc.zc_guid = ZEVENT_NONBLOCK; if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0) @@ -3818,7 +3819,7 @@ zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp, goto out; case ENOENT: /* Blocking error case should not occur */ - if (block) + if (!(flags & ZEVENT_NONBLOCK)) error = zpool_standard_error_fmt(hdl, errno, dgettext(TEXT_DOMAIN, "cannot get event")); From 9e246ac3d8ef9ff8aed86ecf277eea2cae3a79d3 Mon Sep 17 00:00:00 2001 From: Chris Dunlap Date: Tue, 21 Jan 2014 13:30:03 -0800 Subject: [PATCH 06/11] Initial implementation of zed (ZFS Event Daemon) zed monitors ZFS events. When a zevent is posted, zed will run any scripts that have been enabled for the corresponding zevent class. Multiple scripts may be invoked for a given zevent. The zevent nvpairs are passed to the scripts as environment variables. Events are processed synchronously by the single thread, and there is no maximum timeout for script execution. Consequently, a misbehaving script can delay (or forever block) the processing of subsequent zevents. Plans are to address this in future commits. Initial scripts have been developed to log events to syslog and send email in response to checksum/data/io errors and resilver.finish/scrub.finish events. By default, email will only be sent if the ZED_EMAIL variable is configured in zed.rc (which is serving as a config file of sorts until a proper configuration file is implemented). Signed-off-by: Chris Dunlap Signed-off-by: Brian Behlendorf Issue #2 --- cmd/Makefile.am | 2 +- cmd/zed/.gitignore | 1 + cmd/zed/Makefile.am | 62 ++ cmd/zed/zed.c | 236 +++++++ cmd/zed/zed.d/all-debug.sh | 17 + cmd/zed/zed.d/all-syslog.sh | 11 + cmd/zed/zed.d/checksum-email.sh | 1 + cmd/zed/zed.d/data-email.sh | 81 +++ cmd/zed/zed.d/generic-email.sh | 59 ++ cmd/zed/zed.d/io-email.sh | 86 +++ cmd/zed/zed.d/resilver.finish-email.sh | 1 + cmd/zed/zed.d/scrub.finish-email.sh | 73 +++ cmd/zed/zed.d/zed.rc | 28 + cmd/zed/zed.h | 70 +++ cmd/zed/zed_conf.c | 673 ++++++++++++++++++++ cmd/zed/zed_conf.h | 71 +++ cmd/zed/zed_event.c | 829 +++++++++++++++++++++++++ cmd/zed/zed_event.h | 41 ++ cmd/zed/zed_exec.c | 207 ++++++ cmd/zed/zed_exec.h | 36 ++ cmd/zed/zed_file.c | 316 ++++++++++ cmd/zed/zed_file.h | 49 ++ cmd/zed/zed_log.c | 171 +++++ cmd/zed/zed_log.h | 48 ++ cmd/zed/zed_strings.c | 200 ++++++ cmd/zed/zed_strings.h | 44 ++ configure.ac | 1 + man/man8/.gitignore | 1 + man/man8/Makefile.am | 23 + man/man8/zed.8.in | 265 ++++++++ rpm/generic/zfs.spec.in | 1 + scripts/common.sh.in | 9 + scripts/zfs.sh | 1 + zfs-script-config.sh.in | 2 + 34 files changed, 3715 insertions(+), 1 deletion(-) create mode 100644 cmd/zed/.gitignore create mode 100644 cmd/zed/Makefile.am create mode 100644 cmd/zed/zed.c create mode 100755 cmd/zed/zed.d/all-debug.sh create mode 100755 cmd/zed/zed.d/all-syslog.sh create mode 120000 cmd/zed/zed.d/checksum-email.sh create mode 100755 cmd/zed/zed.d/data-email.sh create mode 100755 cmd/zed/zed.d/generic-email.sh create mode 100755 cmd/zed/zed.d/io-email.sh create mode 120000 cmd/zed/zed.d/resilver.finish-email.sh create mode 100755 cmd/zed/zed.d/scrub.finish-email.sh create mode 100644 cmd/zed/zed.d/zed.rc create mode 100644 cmd/zed/zed.h create mode 100644 cmd/zed/zed_conf.c create mode 100644 cmd/zed/zed_conf.h create mode 100644 cmd/zed/zed_event.c create mode 100644 cmd/zed/zed_event.h create mode 100644 cmd/zed/zed_exec.c create mode 100644 cmd/zed/zed_exec.h create mode 100644 cmd/zed/zed_file.c create mode 100644 cmd/zed/zed_file.h create mode 100644 cmd/zed/zed_log.c create mode 100644 cmd/zed/zed_log.h create mode 100644 cmd/zed/zed_strings.c create mode 100644 cmd/zed/zed_strings.h create mode 100644 man/man8/.gitignore create mode 100644 man/man8/zed.8.in diff --git a/cmd/Makefile.am b/cmd/Makefile.am index 4c3e4bbb2d24..968c6c181a8c 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -1,2 +1,2 @@ SUBDIRS = zfs zpool zdb zhack zinject zstreamdump ztest zpios -SUBDIRS += mount_zfs fsck_zfs zvol_id vdev_id arcstat dbufstat +SUBDIRS += mount_zfs fsck_zfs zvol_id vdev_id arcstat dbufstat zed diff --git a/cmd/zed/.gitignore b/cmd/zed/.gitignore new file mode 100644 index 000000000000..76557bb6bb3a --- /dev/null +++ b/cmd/zed/.gitignore @@ -0,0 +1 @@ +/zed diff --git a/cmd/zed/Makefile.am b/cmd/zed/Makefile.am new file mode 100644 index 000000000000..8e4efe919502 --- /dev/null +++ b/cmd/zed/Makefile.am @@ -0,0 +1,62 @@ +include $(top_srcdir)/config/Rules.am + +DEFAULT_INCLUDES += \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/lib/libspl/include + +sbin_PROGRAMS = zed + +zed_SOURCES = \ + $(top_srcdir)/cmd/zed/zed.c \ + $(top_srcdir)/cmd/zed/zed.h \ + $(top_srcdir)/cmd/zed/zed_conf.c \ + $(top_srcdir)/cmd/zed/zed_conf.h \ + $(top_srcdir)/cmd/zed/zed_event.c \ + $(top_srcdir)/cmd/zed/zed_event.h \ + $(top_srcdir)/cmd/zed/zed_exec.c \ + $(top_srcdir)/cmd/zed/zed_exec.h \ + $(top_srcdir)/cmd/zed/zed_file.c \ + $(top_srcdir)/cmd/zed/zed_file.h \ + $(top_srcdir)/cmd/zed/zed_log.c \ + $(top_srcdir)/cmd/zed/zed_log.h \ + $(top_srcdir)/cmd/zed/zed_strings.c \ + $(top_srcdir)/cmd/zed/zed_strings.h + +zed_LDADD = \ + $(top_builddir)/lib/libavl/libavl.la \ + $(top_builddir)/lib/libnvpair/libnvpair.la \ + $(top_builddir)/lib/libspl/libspl.la \ + $(top_builddir)/lib/libzfs/libzfs.la + +zedconfdir = $(sysconfdir)/zfs/zed.d + +dist_zedconf_DATA = \ + $(top_srcdir)/cmd/zed/zed.d/zed.rc + +zedexecdir = $(libexecdir)/zfs/zed.d + +dist_zedexec_SCRIPTS = \ + $(top_srcdir)/cmd/zed/zed.d/all-debug.sh \ + $(top_srcdir)/cmd/zed/zed.d/all-syslog.sh \ + $(top_srcdir)/cmd/zed/zed.d/checksum-email.sh \ + $(top_srcdir)/cmd/zed/zed.d/data-email.sh \ + $(top_srcdir)/cmd/zed/zed.d/generic-email.sh \ + $(top_srcdir)/cmd/zed/zed.d/io-email.sh \ + $(top_srcdir)/cmd/zed/zed.d/resilver.finish-email.sh \ + $(top_srcdir)/cmd/zed/zed.d/scrub.finish-email.sh + +zedconfdefaults = \ + all-syslog.sh \ + checksum-email.sh \ + data-email.sh \ + io-email.sh \ + resilver.finish-email.sh \ + scrub.finish-email.sh + +install-data-local: + $(MKDIR_P) "$(DESTDIR)$(zedconfdir)" + for f in $(zedconfdefaults); do \ + test -f "$(DESTDIR)$(zedconfdir)/$${f}" -o \ + -L "$(DESTDIR)$(zedconfdir)/$${f}" || \ + ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \ + done diff --git a/cmd/zed/zed.c b/cmd/zed/zed.c new file mode 100644 index 000000000000..d2fc0e899717 --- /dev/null +++ b/cmd/zed/zed.c @@ -0,0 +1,236 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license from the top-level + * OPENSOLARIS.LICENSE or . + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each file + * and include the License file from the top-level OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "zed.h" +#include "zed_conf.h" +#include "zed_event.h" +#include "zed_file.h" +#include "zed_log.h" + +static volatile sig_atomic_t _got_exit = 0; +static volatile sig_atomic_t _got_hup = 0; + +/* + * Signal handler for SIGINT & SIGTERM. + */ +static void +_exit_handler(int signum) +{ + _got_exit = 1; +} + +/* + * Signal handler for SIGHUP. + */ +static void +_hup_handler(int signum) +{ + _got_hup = 1; +} + +/* + * Register signal handlers. + */ +static void +_setup_sig_handlers(void) +{ + struct sigaction sa; + + if (sigemptyset(&sa.sa_mask) < 0) + zed_log_die("Failed to initialize sigset"); + + sa.sa_flags = SA_RESTART; + sa.sa_handler = SIG_IGN; + + if (sigaction(SIGPIPE, &sa, NULL) < 0) + zed_log_die("Failed to ignore SIGPIPE"); + + sa.sa_handler = _exit_handler; + if (sigaction(SIGINT, &sa, NULL) < 0) + zed_log_die("Failed to register SIGINT handler"); + + if (sigaction(SIGTERM, &sa, NULL) < 0) + zed_log_die("Failed to register SIGTERM handler"); + + sa.sa_handler = _hup_handler; + if (sigaction(SIGHUP, &sa, NULL) < 0) + zed_log_die("Failed to register SIGHUP handler"); +} + +/* + * Lock all current and future pages in the virtual memory address space. + * Access to locked pages will never be delayed by a page fault. + * EAGAIN is tested up to max_tries in case this is a transient error. + */ +static void +_lock_memory(void) +{ +#if ! _POSIX_MEMLOCK + zed_log_die("Failed to lock memory pages: mlockall() not supported"); + +#else /* _POSIX_MEMLOCK */ + int i = 0; + const int max_tries = 10; + + for (i = 0; i < max_tries; i++) { + if (mlockall(MCL_CURRENT | MCL_FUTURE) == 0) { + zed_log_msg(LOG_INFO, "Locked all pages in memory"); + return; + } + if (errno != EAGAIN) + break; + } + zed_log_die("Failed to lock memory pages: %s", strerror(errno)); + +#endif /* _POSIX_MEMLOCK */ +} + +/* + * Transform the process into a daemon. + */ +static void +_become_daemon(void) +{ + pid_t pid; + int fd; + + pid = fork(); + if (pid < 0) { + zed_log_die("Failed to create child process: %s", + strerror(errno)); + } else if (pid > 0) { + _exit(EXIT_SUCCESS); + } + if (setsid() < 0) + zed_log_die("Failed to create new session: %s", + strerror(errno)); + + pid = fork(); + if (pid < 0) { + zed_log_die("Failed to create grandchild process: %s", + strerror(errno)); + } else if (pid > 0) { + _exit(EXIT_SUCCESS); + } + fd = open("/dev/null", O_RDWR); + + if (fd < 0) + zed_log_die("Failed to open /dev/null: %s", strerror(errno)); + + if (dup2(fd, STDIN_FILENO) < 0) + zed_log_die("Failed to dup /dev/null onto stdin: %s", + strerror(errno)); + + if (dup2(fd, STDOUT_FILENO) < 0) + zed_log_die("Failed to dup /dev/null onto stdout: %s", + strerror(errno)); + + if (dup2(fd, STDERR_FILENO) < 0) + zed_log_die("Failed to dup /dev/null onto stderr: %s", + strerror(errno)); + + if (close(fd) < 0) + zed_log_die("Failed to close /dev/null: %s", strerror(errno)); +} + +/* + * ZFS Event Daemon (ZED). + */ +int +main(int argc, char *argv[]) +{ + struct zed_conf *zcp; + uint64_t saved_eid; + int64_t saved_etime[2]; + + zed_log_init(argv[0]); + zed_log_stderr_open(LOG_NOTICE); + zcp = zed_conf_create(); + zed_conf_parse_opts(zcp, argc, argv); + if (zcp->do_verbose) + zed_log_stderr_open(LOG_INFO); + + if (geteuid() != 0) + zed_log_die("Must be run as root"); + + (void) umask(0); + + _setup_sig_handlers(); + + zed_conf_parse_file(zcp); + + zed_file_close_from(STDERR_FILENO + 1); + + if (chdir("/") < 0) + zed_log_die("Failed to change to root directory"); + + if (zed_conf_scan_dir(zcp) < 0) + exit(EXIT_FAILURE); + + if (zcp->do_memlock) + _lock_memory(); + + if (!zcp->do_foreground) { + _become_daemon(); + zed_log_syslog_open(LOG_DAEMON); + zed_log_stderr_close(); + } + zed_log_msg(LOG_NOTICE, + "ZFS Event Daemon %s-%s", ZFS_META_VERSION, ZFS_META_RELEASE); + + (void) zed_conf_write_pid(zcp); + + if (zed_conf_open_state(zcp) < 0) + exit(EXIT_FAILURE); + + if (zed_conf_read_state(zcp, &saved_eid, saved_etime) < 0) + exit(EXIT_FAILURE); + + zed_event_init(zcp); + zed_event_seek(zcp, saved_eid, saved_etime); + + while (!_got_exit) { + if (_got_hup) { + _got_hup = 0; + (void) zed_conf_scan_dir(zcp); + } + zed_event_service(zcp); + } + zed_log_msg(LOG_NOTICE, "Exiting"); + zed_event_fini(zcp); + zed_conf_destroy(zcp); + zed_log_fini(); + exit(EXIT_SUCCESS); +} diff --git a/cmd/zed/zed.d/all-debug.sh b/cmd/zed/zed.d/all-debug.sh new file mode 100755 index 000000000000..ae64e0a79939 --- /dev/null +++ b/cmd/zed/zed.d/all-debug.sh @@ -0,0 +1,17 @@ +#!/bin/sh +# +# Log all environment variables to ZED_DEBUG_LOG. +# +test -f "${ZED_SCRIPT_DIR}/zed.rc" && . "${ZED_SCRIPT_DIR}/zed.rc" + +# Override the default umask to restrict access to a newly-created logfile. +umask 077 + +# Append stdout to the logfile after obtaining an advisory lock. +exec >> "${ZED_DEBUG_LOG:=/tmp/zed.debug.log}" +flock -x 1 + +printenv | sort +echo + +exit 0 diff --git a/cmd/zed/zed.d/all-syslog.sh b/cmd/zed/zed.d/all-syslog.sh new file mode 100755 index 000000000000..b8bd307a1d0b --- /dev/null +++ b/cmd/zed/zed.d/all-syslog.sh @@ -0,0 +1,11 @@ +#!/bin/sh +# +# Log the zevent via syslog. +# +test -f "${ZED_SCRIPT_DIR}/zed.rc" && . "${ZED_SCRIPT_DIR}/zed.rc" + +logger -t "${ZED_SYSLOG_TAG:=zed}" -p "${ZED_SYSLOG_PRIORITY:=daemon.notice}" \ + eid="${ZEVENT_EID}" class="${ZEVENT_SUBCLASS}" \ + "${ZEVENT_POOL:+pool=$ZEVENT_POOL}" + +exit 0 diff --git a/cmd/zed/zed.d/checksum-email.sh b/cmd/zed/zed.d/checksum-email.sh new file mode 120000 index 000000000000..f95bec21532a --- /dev/null +++ b/cmd/zed/zed.d/checksum-email.sh @@ -0,0 +1 @@ +io-email.sh \ No newline at end of file diff --git a/cmd/zed/zed.d/data-email.sh b/cmd/zed/zed.d/data-email.sh new file mode 100755 index 000000000000..9f83161494c2 --- /dev/null +++ b/cmd/zed/zed.d/data-email.sh @@ -0,0 +1,81 @@ +#!/bin/sh +# +# Send email to ZED_EMAIL in response to a DATA zevent. +# Only one message per ZED_EMAIL_INTERVAL_SECS will be sent for a given +# class/pool combination. This protects against spamming the recipient +# should multiple events occur together in time for the same pool. +# Exit codes: +# 0: email sent +# 1: email failed +# 2: email suppressed +# 3: missing executable +# 4: unsupported event class +# 5: internal error +# State File Format: +# POOL:TIME_OF_LAST_EMAIL +# +test -f "${ZED_SCRIPT_DIR}/zed.rc" && . "${ZED_SCRIPT_DIR}/zed.rc" + +test -n "${ZEVENT_POOL}" || exit 5 +test -n "${ZEVENT_SUBCLASS}" || exit 5 + +if test "${ZEVENT_SUBCLASS}" != "data"; then \ + logger -t "${ZED_SYSLOG_TAG:=zed}" \ + -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ + `basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\" + exit 4 +fi + +# Only send email if ZED_EMAIL has been configured. +test -n "${ZED_EMAIL}" || exit 2 + +# Ensure requisite executables are installed. +if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then + logger -t "${ZED_SYSLOG_TAG:=zed}" \ + -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ + `basename "$0"`: "${MAIL}" not installed + exit 3 +fi + +NAME="zed.${ZEVENT_SUBCLASS}.email" +LOCKFILE="${ZED_LOCKDIR:=/var/lock}/${NAME}.lock" +STATEFILE="${ZED_RUNDIR:=/var/run}/${NAME}.state" + +# Obtain lock to ensure mutual exclusion for accessing state. +exec 8> "${LOCKFILE}" +flock -x 8 + +# Query state for last time email was sent for this pool. +TIME_NOW=`date +%s` +TIME_LAST=`egrep "^${ZEVENT_POOL}:" "${STATEFILE}" 2>/dev/null | cut -d: -f2` +if test -n "${TIME_LAST}"; then + TIME_DELTA=`expr "${TIME_NOW}" - "${TIME_LAST}"` + if test "${TIME_DELTA}" -lt "${ZED_EMAIL_INTERVAL_SECS:=3600}"; then + exit 2 + fi +fi + +"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on `hostname`" \ + "${ZED_EMAIL}" </dev/null > "${STATEFILE}.$$" +echo "${ZEVENT_POOL}:${TIME_NOW}" >> "${STATEFILE}.$$" +mv -f "${STATEFILE}.$$" "${STATEFILE}" + +if test "${MAIL_STATUS}" -ne 0; then + logger -t "${ZED_SYSLOG_TAG:=zed}" \ + -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ + `basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}" + exit 1 +fi + +exit 0 diff --git a/cmd/zed/zed.d/generic-email.sh b/cmd/zed/zed.d/generic-email.sh new file mode 100755 index 000000000000..16bbdb1974df --- /dev/null +++ b/cmd/zed/zed.d/generic-email.sh @@ -0,0 +1,59 @@ +#!/bin/sh +# +# Send email to ZED_EMAIL in response to a given zevent. +# This is a generic script than can be symlinked to a file in the zed +# enabled-scripts directory in order to have email sent when a particular +# class of zevents occurs. The symlink filename must begin with the zevent +# (sub)class string (eg, "probe_failure-email.sh" for the "probe_failure" +# subclass). Refer to the zed(8) manpage for details. +# Exit codes: +# 0: email sent +# 1: email failed +# 2: email suppressed +# 3: missing executable +# +test -f "${ZED_SCRIPT_DIR}/zed.rc" && . "${ZED_SCRIPT_DIR}/zed.rc" + +# Only send email if ZED_EMAIL has been configured. +test -n "${ZED_EMAIL}" || exit 2 + +# Ensure requisite executables are installed. +if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then + logger -t "${ZED_SYSLOG_TAG:=zed}" \ + -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ + `basename "$0"`: "${MAIL}" not installed + exit 3 +fi + +# Override the default umask to restrict access to the msgbody tmpfile. +umask 077 + +SUBJECT="ZFS ${ZEVENT_SUBCLASS} event" +test -n "${ZEVENT_POOL}" && SUBJECT="${SUBJECT} for ${ZEVENT_POOL}" +SUBJECT="${SUBJECT} on `hostname`" + +MSGBODY="${TMPDIR:=/tmp}/`basename \"$0\"`.$$" +{ + echo "A ZFS ${ZEVENT_SUBCLASS} event has been posted:" + echo + echo " eid: ${ZEVENT_EID}" + echo " host: `hostname`" + echo " time: ${ZEVENT_TIME_STRING}" + test -n "${ZEVENT_VDEV_TYPE}" -a -n "${ZEVENT_VDEV_PATH}" && \ + echo " vdev: ${ZEVENT_VDEV_TYPE}:${ZEVENT_VDEV_PATH}" + test -n "${ZEVENT_POOL}" -a -x "${ZPOOL}" && \ + "${ZPOOL}" status "${ZEVENT_POOL}" +} > "${MSGBODY}" + +test -f "${MSGBODY}" && "${MAIL}" -s "${SUBJECT}" "${ZED_EMAIL}" < "${MSGBODY}" +MAIL_STATUS=$? +rm -f "${MSGBODY}" + +if test "${MAIL_STATUS}" -ne 0; then + logger -t "${ZED_SYSLOG_TAG:=zed}" \ + -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ + `basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}" + exit 1 +fi + +exit 0 diff --git a/cmd/zed/zed.d/io-email.sh b/cmd/zed/zed.d/io-email.sh new file mode 100755 index 000000000000..6cfe3c7f7fa9 --- /dev/null +++ b/cmd/zed/zed.d/io-email.sh @@ -0,0 +1,86 @@ +#!/bin/sh +# +# Send email to ZED_EMAIL in response to a CHECKSUM or IO zevent. +# Only one message per ZED_EMAIL_INTERVAL_SECS will be sent for a given +# class/pool/vdev combination. This protects against spamming the recipient +# should multiple events occur together in time for the same pool/device. +# Exit codes: +# 0: email sent +# 1: email failed +# 2: email suppressed +# 3: missing executable +# 4: unsupported event class +# 5: internal error +# State File Format: +# POOL:VDEV_PATH:TIME_OF_LAST_EMAIL +# +test -f "${ZED_SCRIPT_DIR}/zed.rc" && . "${ZED_SCRIPT_DIR}/zed.rc" + +test -n "${ZEVENT_POOL}" || exit 5 +test -n "${ZEVENT_SUBCLASS}" || exit 5 +test -n "${ZEVENT_VDEV_PATH}" || exit 5 + +if test "${ZEVENT_SUBCLASS}" != "checksum" \ + -a "${ZEVENT_SUBCLASS}" != "io"; then + logger -t "${ZED_SYSLOG_TAG:=zed}" \ + -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ + `basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\" + exit 4 +fi + +# Only send email if ZED_EMAIL has been configured. +test -n "${ZED_EMAIL}" || exit 2 + +# Ensure requisite executables are installed. +if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then + logger -t "${ZED_SYSLOG_TAG:=zed}" \ + -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ + `basename "$0"`: "${MAIL}" not installed + exit 3 +fi + +NAME="zed.${ZEVENT_SUBCLASS}.email" +LOCKFILE="${ZED_LOCKDIR:=/var/lock}/${NAME}.lock" +STATEFILE="${ZED_RUNDIR:=/var/run}/${NAME}.state" + +# Obtain lock to ensure mutual exclusion for accessing state. +exec 8> "${LOCKFILE}" +flock -x 8 + +# Query state for last time email was sent for this pool/vdev. +TIME_NOW=`date +%s` +TIME_LAST=`egrep "^${ZEVENT_POOL}:${ZEVENT_VDEV_PATH}:" "${STATEFILE}" \ + 2>/dev/null | cut -d: -f3` +if test -n "${TIME_LAST}"; then + TIME_DELTA=`expr "${TIME_NOW}" - "${TIME_LAST}"` + if test "${TIME_DELTA}" -lt "${ZED_EMAIL_INTERVAL_SECS:=3600}"; then + exit 2 + fi +fi + +"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on `hostname`" \ + "${ZED_EMAIL}" </dev/null > "${STATEFILE}.$$" +echo "${ZEVENT_POOL}:${ZEVENT_VDEV_PATH}:${TIME_NOW}" >> "${STATEFILE}.$$" +mv -f "${STATEFILE}.$$" "${STATEFILE}" + +if test "${MAIL_STATUS}" -ne 0; then + logger -t "${ZED_SYSLOG_TAG:=zed}" \ + -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ + `basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}" + exit 1 +fi + +exit 0 diff --git a/cmd/zed/zed.d/resilver.finish-email.sh b/cmd/zed/zed.d/resilver.finish-email.sh new file mode 120000 index 000000000000..1afad3258d5e --- /dev/null +++ b/cmd/zed/zed.d/resilver.finish-email.sh @@ -0,0 +1 @@ +scrub.finish-email.sh \ No newline at end of file diff --git a/cmd/zed/zed.d/scrub.finish-email.sh b/cmd/zed/zed.d/scrub.finish-email.sh new file mode 100755 index 000000000000..b5ce3f74d732 --- /dev/null +++ b/cmd/zed/zed.d/scrub.finish-email.sh @@ -0,0 +1,73 @@ +#!/bin/sh +# +# Send email to ZED_EMAIL in response to a RESILVER.FINISH or SCRUB.FINISH. +# By default, "zpool status" output will only be included in the email for +# a scrub.finish zevent if the pool is not healthy; to always include its +# output, set ZED_EMAIL_VERBOSE=1. +# Exit codes: +# 0: email sent +# 1: email failed +# 2: email suppressed +# 3: missing executable +# 4: unsupported event class +# 5: internal error +# +test -f "${ZED_SCRIPT_DIR}/zed.rc" && . "${ZED_SCRIPT_DIR}/zed.rc" + +test -n "${ZEVENT_POOL}" || exit 5 +test -n "${ZEVENT_SUBCLASS}" || exit 5 + +if test "${ZEVENT_SUBCLASS}" = "resilver.finish"; then + ACTION="resilvering" +elif test "${ZEVENT_SUBCLASS}" = "scrub.finish"; then + ACTION="scrubbing" +else + logger -t "${ZED_SYSLOG_TAG:=zed}" \ + -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ + `basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\" + exit 4 +fi + +# Only send email if ZED_EMAIL has been configured. +test -n "${ZED_EMAIL}" || exit 2 + +# Ensure requisite executables are installed. +if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then + logger -t "${ZED_SYSLOG_TAG:=zed}" \ + -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ + `basename "$0"`: "${MAIL}" not installed + exit 3 +fi +if ! test -x "${ZPOOL}"; then + logger -t "${ZED_SYSLOG_TAG:=zed}" \ + -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ + `basename "$0"`: "${ZPOOL}" not installed + exit 3 +fi + +# For scrub, suppress email if pool is healthy and verbosity is not enabled. +if test "${ZEVENT_SUBCLASS}" = "scrub.finish"; then + HEALTHY=`"${ZPOOL}" status -x "${ZEVENT_POOL}" | \ + grep "'${ZEVENT_POOL}' is healthy"` + test -n "${HEALTHY}" -a "${ZED_EMAIL_VERBOSE:=0}" = 0 && exit 2 +fi + +"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} event for ${ZEVENT_POOL} on `hostname`" \ + "${ZED_EMAIL}" <. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each file + * and include the License file from the top-level OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + */ + +#ifndef ZED_H +#define ZED_H + +/* + * Absolute path for the default zed configuration file. + */ +#define ZED_CONF_FILE SYSCONFDIR "/zfs/zed.conf" + +/* + * Absolute path for the default zed pid file. + */ +#define ZED_PID_FILE RUNSTATEDIR "/zed.pid" + +/* + * Absolute path for the default zed state file. + */ +#define ZED_STATE_FILE RUNSTATEDIR "/zed.state" + +/* + * Absolute path for the default zed script directory. + */ +#define ZED_SCRIPT_DIR SYSCONFDIR "/zfs/zed.d" + +/* + * Reserved for future use. + */ +#define ZED_MAX_EVENTS 0 + +/* + * Reserved for future use. + */ +#define ZED_MIN_EVENTS 0 + +/* + * String prefix for ZED variables passed via environment variables. + */ +#define ZED_VAR_PREFIX "ZED_" + +/* + * String prefix for ZFS event names passed via environment variables. + */ +#define ZEVENT_VAR_PREFIX "ZEVENT_" + +#endif /* !ZED_H */ diff --git a/cmd/zed/zed_conf.c b/cmd/zed/zed_conf.c new file mode 100644 index 000000000000..fb3e552e91c9 --- /dev/null +++ b/cmd/zed/zed_conf.c @@ -0,0 +1,673 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license from the top-level + * OPENSOLARIS.LICENSE or . + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each file + * and include the License file from the top-level OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "zed.h" +#include "zed_conf.h" +#include "zed_file.h" +#include "zed_log.h" +#include "zed_strings.h" + +/* + * Return a new configuration with default values. + */ +struct zed_conf * +zed_conf_create(void) +{ + struct zed_conf *zcp; + + zcp = malloc(sizeof (*zcp)); + if (!zcp) + goto nomem; + + memset(zcp, 0, sizeof (*zcp)); + + zcp->syslog_facility = LOG_DAEMON; + zcp->min_events = ZED_MIN_EVENTS; + zcp->max_events = ZED_MAX_EVENTS; + zcp->scripts = NULL; /* created via zed_conf_scan_dir() */ + zcp->state_fd = -1; /* opened via zed_conf_open_state() */ + zcp->zfs_hdl = NULL; /* opened via zed_event_init() */ + zcp->zevent_fd = -1; /* opened via zed_event_init() */ + + if (!(zcp->conf_file = strdup(ZED_CONF_FILE))) + goto nomem; + + if (!(zcp->pid_file = strdup(ZED_PID_FILE))) + goto nomem; + + if (!(zcp->script_dir = strdup(ZED_SCRIPT_DIR))) + goto nomem; + + if (!(zcp->state_file = strdup(ZED_STATE_FILE))) + goto nomem; + + return (zcp); + +nomem: + zed_log_die("Failed to create conf: %s", strerror(errno)); + return (NULL); +} + +/* + * Destroy the configuration [zcp]. + * Note: zfs_hdl & zevent_fd are destroyed via zed_event_fini(). + */ +void +zed_conf_destroy(struct zed_conf *zcp) +{ + if (!zcp) + return; + + if (zcp->state_fd >= 0) { + if (close(zcp->state_fd) < 0) + zed_log_msg(LOG_WARNING, + "Failed to close state file \"%s\": %s", + zcp->state_file, strerror(errno)); + } + if (zcp->pid_file) { + if ((unlink(zcp->pid_file) < 0) && (errno != ENOENT)) + zed_log_msg(LOG_WARNING, + "Failed to remove pid file \"%s\": %s", + zcp->pid_file, strerror(errno)); + } + if (zcp->conf_file) + free(zcp->conf_file); + + if (zcp->pid_file) + free(zcp->pid_file); + + if (zcp->script_dir) + free(zcp->script_dir); + + if (zcp->state_file) + free(zcp->state_file); + + if (zcp->scripts) + zed_strings_destroy(zcp->scripts); + + free(zcp); +} + +/* + * Display command-line help and exit. + * If [got_err] is 0, output to stdout and exit normally; + * otherwise, output to stderr and exit with a failure status. + */ +static void +_zed_conf_display_help(const char *prog, int got_err) +{ + FILE *fp = got_err ? stderr : stdout; + int w1 = 4; /* width of leading whitespace */ + int w2 = 8; /* width of L-justified option field */ + + fprintf(fp, "Usage: %s [OPTION]...\n", (prog ? prog : "zed")); + fprintf(fp, "\n"); + fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-h", + "Display help."); + fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-L", + "Display license information."); + fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-V", + "Display version information."); + fprintf(fp, "\n"); + fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-v", + "Be verbose."); + fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-f", + "Force daemon to run."); + fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-F", + "Run daemon in the foreground."); + fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-M", + "Lock all pages in memory."); + fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-Z", + "Zero state file."); + fprintf(fp, "\n"); +#if 0 + fprintf(fp, "%*c%*s %s [%s]\n", w1, 0x20, -w2, "-c FILE", + "Read configuration from FILE.", ZED_CONF_FILE); +#endif + fprintf(fp, "%*c%*s %s [%s]\n", w1, 0x20, -w2, "-d DIR", + "Read enabled scripts from DIR.", ZED_SCRIPT_DIR); + fprintf(fp, "%*c%*s %s [%s]\n", w1, 0x20, -w2, "-p FILE", + "Write daemon's PID to FILE.", ZED_PID_FILE); + fprintf(fp, "%*c%*s %s [%s]\n", w1, 0x20, -w2, "-s FILE", + "Write daemon's state to FILE.", ZED_STATE_FILE); + fprintf(fp, "\n"); + + exit(got_err ? EXIT_FAILURE : EXIT_SUCCESS); +} + +/* + * Display license information to stdout and exit. + */ +static void +_zed_conf_display_license(void) +{ + const char **pp; + const char *text[] = { + "The ZFS Event Daemon (ZED) is distributed under the terms of the", + " Common Development and Distribution License (CDDL-1.0)", + " .", + "Developed at Lawrence Livermore National Laboratory" + " (LLNL-CODE-403049).", + "Copyright (C) 2013-2014" + " Lawrence Livermore National Security, LLC.", + "", + NULL + }; + + for (pp = text; *pp; pp++) + printf("%s\n", *pp); + + exit(EXIT_SUCCESS); +} + +/* + * Display version information to stdout and exit. + */ +static void +_zed_conf_display_version(void) +{ + printf("%s-%s-%s\n", + ZFS_META_NAME, ZFS_META_VERSION, ZFS_META_RELEASE); + + exit(EXIT_SUCCESS); +} + +/* + * Copy the [path] string to the [resultp] ptr. + * If [path] is not an absolute path, prefix it with the current working dir. + * If [resultp] is non-null, free its existing string before assignment. + */ +static void +_zed_conf_parse_path(char **resultp, const char *path) +{ + char buf[PATH_MAX]; + + assert(resultp != NULL); + assert(path != NULL); + + if (*resultp) + free(*resultp); + + if (path[0] == '/') { + *resultp = strdup(path); + } else if (!getcwd(buf, sizeof (buf))) { + zed_log_die("Failed to get current working dir: %s", + strerror(errno)); + } else if (strlcat(buf, "/", sizeof (buf)) >= sizeof (buf)) { + zed_log_die("Failed to copy path: %s", strerror(ENAMETOOLONG)); + } else if (strlcat(buf, path, sizeof (buf)) >= sizeof (buf)) { + zed_log_die("Failed to copy path: %s", strerror(ENAMETOOLONG)); + } else { + *resultp = strdup(buf); + } + if (!*resultp) + zed_log_die("Failed to copy path: %s", strerror(ENOMEM)); +} + +/* + * Parse the command-line options into the configuration [zcp]. + */ +void +zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv) +{ + const char * const opts = ":hLVc:d:p:s:vfFMZ"; + int opt; + + if (!zcp || !argv || !argv[0]) + zed_log_die("Failed to parse options: Internal error"); + + opterr = 0; /* suppress default getopt err msgs */ + + while ((opt = getopt(argc, argv, opts)) != -1) { + switch (opt) { + case 'h': + _zed_conf_display_help(argv[0], EXIT_SUCCESS); + break; + case 'L': + _zed_conf_display_license(); + break; + case 'V': + _zed_conf_display_version(); + break; + case 'c': + _zed_conf_parse_path(&zcp->conf_file, optarg); + break; + case 'd': + _zed_conf_parse_path(&zcp->script_dir, optarg); + break; + case 'p': + _zed_conf_parse_path(&zcp->pid_file, optarg); + break; + case 's': + _zed_conf_parse_path(&zcp->state_file, optarg); + break; + case 'v': + zcp->do_verbose = 1; + break; + case 'f': + zcp->do_force = 1; + break; + case 'F': + zcp->do_foreground = 1; + break; + case 'M': + zcp->do_memlock = 1; + break; + case 'Z': + zcp->do_zero = 1; + break; + case '?': + default: + if (optopt == '?') + _zed_conf_display_help(argv[0], EXIT_SUCCESS); + + fprintf(stderr, "%s: %s '-%c'\n\n", argv[0], + "Invalid option", optopt); + _zed_conf_display_help(argv[0], EXIT_FAILURE); + break; + } + } +} + +/* + * Parse the configuration file into the configuration [zcp]. + * FIXME: Not yet implemented. + */ +void +zed_conf_parse_file(struct zed_conf *zcp) +{ + if (!zcp) + zed_log_die("Failed to parse config: %s", strerror(EINVAL)); +} + +/* + * Scan the [zcp] script_dir for files to exec based on the event class. + * Files must be executable by user, but not writable by group or other. + * Dotfiles are ignored. + * Return 0 on success with an updated set of scripts, + * or -1 on error with errno set. + * FIXME: Check if script_dir and all parent dirs are secure. + */ +int +zed_conf_scan_dir(struct zed_conf *zcp) +{ + zed_strings_t *scripts; + DIR *dirp; + struct dirent *direntp; + char pathname[PATH_MAX]; + struct stat st; + int n; + + if (!zcp) { + errno = EINVAL; + zed_log_msg(LOG_ERR, "Failed to scan script dir: %s", + strerror(errno)); + return (-1); + } + scripts = zed_strings_create(); + if (!scripts) { + errno = ENOMEM; + zed_log_msg(LOG_WARNING, "Failed to scan dir \"%s\": %s", + zcp->script_dir, strerror(errno)); + return (-1); + } + dirp = opendir(zcp->script_dir); + if (!dirp) { + int errno_bak = errno; + zed_log_msg(LOG_WARNING, "Failed to open dir \"%s\": %s", + zcp->script_dir, strerror(errno)); + zed_strings_destroy(scripts); + errno = errno_bak; + return (-1); + } + while ((direntp = readdir(dirp))) { + if (direntp->d_name[0] == '.') + continue; + + n = snprintf(pathname, sizeof (pathname), + "%s/%s", zcp->script_dir, direntp->d_name); + if ((n < 0) || (n >= sizeof (pathname))) { + zed_log_msg(LOG_WARNING, "Failed to stat \"%s\": %s", + direntp->d_name, strerror(ENAMETOOLONG)); + continue; + } + if (stat(pathname, &st) < 0) { + zed_log_msg(LOG_WARNING, "Failed to stat \"%s\": %s", + pathname, strerror(errno)); + continue; + } + if (!S_ISREG(st.st_mode)) { + zed_log_msg(LOG_INFO, + "Ignoring \"%s\": not a regular file", + direntp->d_name); + continue; + } + if ((st.st_uid != 0) && !zcp->do_force) { + zed_log_msg(LOG_NOTICE, + "Ignoring \"%s\": not owned by root", + direntp->d_name); + continue; + } + if (!(st.st_mode & S_IXUSR)) { + zed_log_msg(LOG_INFO, + "Ignoring \"%s\": not executable by user", + direntp->d_name); + continue; + } + if ((st.st_mode & S_IWGRP) & !zcp->do_force) { + zed_log_msg(LOG_NOTICE, + "Ignoring \"%s\": writable by group", + direntp->d_name); + continue; + } + if ((st.st_mode & S_IWOTH) & !zcp->do_force) { + zed_log_msg(LOG_NOTICE, + "Ignoring \"%s\": writable by other", + direntp->d_name); + continue; + } + if (zed_strings_add(scripts, direntp->d_name) < 0) { + zed_log_msg(LOG_WARNING, + "Failed to register \"%s\": %s", + direntp->d_name, strerror(errno)); + continue; + } + if (zcp->do_verbose) + zed_log_msg(LOG_INFO, + "Registered script \"%s\"", direntp->d_name); + } + if (closedir(dirp) < 0) { + int errno_bak = errno; + zed_log_msg(LOG_WARNING, "Failed to close dir \"%s\": %s", + zcp->script_dir, strerror(errno)); + zed_strings_destroy(scripts); + errno = errno_bak; + return (-1); + } + if (zcp->scripts) + zed_strings_destroy(zcp->scripts); + + zcp->scripts = scripts; + return (0); +} + +/* + * Write the PID file specified in [zcp]. + * Return 0 on success, -1 on error. + * XXX: This must be called after fork()ing to become a daemon. + */ +int +zed_conf_write_pid(struct zed_conf *zcp) +{ + char dirbuf[PATH_MAX]; + int n; + char *p; + mode_t mask; + FILE *fp; + + if (!zcp || !zcp->pid_file) { + errno = EINVAL; + zed_log_msg(LOG_ERR, "Failed to write pid file: %s", + strerror(errno)); + return (-1); + } + n = strlcpy(dirbuf, zcp->pid_file, sizeof (dirbuf)); + if (n >= sizeof (dirbuf)) { + errno = ENAMETOOLONG; + zed_log_msg(LOG_WARNING, "Failed to write pid file: %s", + strerror(errno)); + return (-1); + } + p = strrchr(dirbuf, '/'); + if (p) + *p = '\0'; + + /* FIXME: Replace with mkdirp()? (lib/libspl/mkdirp.c) */ + if (zed_file_create_dirs(dirbuf) < 0) + return (-1); + + (void) unlink(zcp->pid_file); + + mask = umask(0); + umask(mask | 022); + fp = fopen(zcp->pid_file, "w"); + umask(mask); + + if (!fp) { + zed_log_msg(LOG_WARNING, "Failed to open pid file \"%s\": %s", + zcp->pid_file, strerror(errno)); + } else if (fprintf(fp, "%d\n", (int) getpid()) == EOF) { + zed_log_msg(LOG_WARNING, "Failed to write pid file \"%s\": %s", + zcp->pid_file, strerror(errno)); + } else if (fclose(fp) == EOF) { + zed_log_msg(LOG_WARNING, "Failed to close pid file \"%s\": %s", + zcp->pid_file, strerror(errno)); + } else { + return (0); + } + (void) unlink(zcp->pid_file); + return (-1); +} + +/* + * Open and lock the [zcp] state_file. + * Return 0 on success, -1 on error. + * FIXME: If state_file exists, verify ownership & permissions. + * FIXME: Move lock to pid_file instead. + */ +int +zed_conf_open_state(struct zed_conf *zcp) +{ + char dirbuf[PATH_MAX]; + int n; + char *p; + int rv; + + if (!zcp || !zcp->state_file) { + errno = EINVAL; + zed_log_msg(LOG_ERR, "Failed to open state file: %s", + strerror(errno)); + return (-1); + } + n = strlcpy(dirbuf, zcp->state_file, sizeof (dirbuf)); + if (n >= sizeof (dirbuf)) { + errno = ENAMETOOLONG; + zed_log_msg(LOG_WARNING, "Failed to open state file: %s", + strerror(errno)); + return (-1); + } + p = strrchr(dirbuf, '/'); + if (p) + *p = '\0'; + + /* FIXME: Replace with mkdirp()? (lib/libspl/mkdirp.c) */ + if (zed_file_create_dirs(dirbuf) < 0) + return (-1); + + if (zcp->state_fd >= 0) { + if (close(zcp->state_fd) < 0) { + zed_log_msg(LOG_WARNING, + "Failed to close state file \"%s\": %s", + zcp->state_file, strerror(errno)); + return (-1); + } + } + if (zcp->do_zero) + (void) unlink(zcp->state_file); + + zcp->state_fd = open(zcp->state_file, + (O_RDWR | O_CREAT), (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)); + if (zcp->state_fd < 0) { + zed_log_msg(LOG_WARNING, "Failed to open state file \"%s\": %s", + zcp->state_file, strerror(errno)); + return (-1); + } + rv = zed_file_lock(zcp->state_fd); + if (rv < 0) { + zed_log_msg(LOG_WARNING, "Failed to lock state file \"%s\": %s", + zcp->state_file, strerror(errno)); + return (-1); + } + if (rv > 0) { + pid_t pid = zed_file_is_locked(zcp->state_fd); + if (pid < 0) { + zed_log_msg(LOG_WARNING, + "Failed to test lock on state file \"%s\"", + zcp->state_file); + } else if (pid > 0) { + zed_log_msg(LOG_WARNING, + "Found pid %d bound to state file \"%s\"", + pid, zcp->state_file); + } else { + zed_log_msg(LOG_WARNING, + "Inconsistent lock state on state file \"%s\"", + zcp->state_file); + } + return (-1); + } + return (0); +} + +/* + * Read the opened [zcp] state_file to obtain the eid & etime + * of the last event processed. + * Write the state from the last event to the [eidp] & [etime] args + * passed by reference. + * Note that etime[] is an array of size 2. + * Return 0 on success, -1 on error. + */ +int +zed_conf_read_state(struct zed_conf *zcp, uint64_t *eidp, int64_t etime[]) +{ + ssize_t len; + struct iovec iov[3]; + ssize_t n; + + if (!zcp || !eidp || !etime) { + errno = EINVAL; + zed_log_msg(LOG_ERR, + "Failed to read state file: %s", strerror(errno)); + return (-1); + } + if (lseek(zcp->state_fd, 0, SEEK_SET) == (off_t) -1) { + zed_log_msg(LOG_WARNING, + "Failed to reposition state file offset: %s", + strerror(errno)); + return (-1); + } + len = 0; + iov[0].iov_base = eidp; + len += iov[0].iov_len = sizeof (*eidp); + iov[1].iov_base = &etime[0]; + len += iov[1].iov_len = sizeof (etime[0]); + iov[2].iov_base = &etime[1]; + len += iov[2].iov_len = sizeof (etime[1]); + + n = readv(zcp->state_fd, iov, 3); + if (n == 0) { + *eidp = 0; + } else if (n < 0) { + zed_log_msg(LOG_WARNING, + "Failed to read state file \"%s\": %s", + zcp->state_file, strerror(errno)); + return (-1); + } else if (n != len) { + errno = EIO; + zed_log_msg(LOG_WARNING, + "Failed to read state file \"%s\": Read %d of %d bytes", + zcp->state_file, n, len); + return (-1); + } + return (0); +} + +/* + * Write the [eid] & [etime] of the last processed event to the opened + * [zcp] state_file. + * Note that etime[] is an array of size 2. + * Return 0 on success, -1 on error. + */ +int +zed_conf_write_state(struct zed_conf *zcp, uint64_t eid, int64_t etime[]) +{ + ssize_t len; + struct iovec iov[3]; + ssize_t n; + + if (!zcp) { + errno = EINVAL; + zed_log_msg(LOG_ERR, + "Failed to write state file: %s", strerror(errno)); + return (-1); + } + if (lseek(zcp->state_fd, 0, SEEK_SET) == (off_t) -1) { + zed_log_msg(LOG_WARNING, + "Failed to reposition state file offset: %s", + strerror(errno)); + return (-1); + } + len = 0; + iov[0].iov_base = &eid; + len += iov[0].iov_len = sizeof (eid); + iov[1].iov_base = &etime[0]; + len += iov[1].iov_len = sizeof (etime[0]); + iov[2].iov_base = &etime[1]; + len += iov[2].iov_len = sizeof (etime[1]); + + n = writev(zcp->state_fd, iov, 3); + if (n < 0) { + zed_log_msg(LOG_WARNING, + "Failed to write state file \"%s\": %s", + zcp->state_file, strerror(errno)); + return (-1); + } + if (n != len) { + errno = EIO; + zed_log_msg(LOG_WARNING, + "Failed to write state file \"%s\": Wrote %d of %d bytes", + zcp->state_file, n, len); + return (-1); + } + if (fdatasync(zcp->state_fd) < 0) { + zed_log_msg(LOG_WARNING, + "Failed to sync state file \"%s\": %s", + zcp->state_file, strerror(errno)); + return (-1); + } + return (0); +} diff --git a/cmd/zed/zed_conf.h b/cmd/zed/zed_conf.h new file mode 100644 index 000000000000..51b98ea7693a --- /dev/null +++ b/cmd/zed/zed_conf.h @@ -0,0 +1,71 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license from the top-level + * OPENSOLARIS.LICENSE or . + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each file + * and include the License file from the top-level OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + */ + +#ifndef ZED_CONF_H +#define ZED_CONF_H + +#include +#include +#include "zed_strings.h" + +struct zed_conf { + unsigned do_force:1; /* true if force enabled */ + unsigned do_foreground:1; /* true if run in foreground */ + unsigned do_memlock:1; /* true if locking memory */ + unsigned do_verbose:1; /* true if verbosity enabled */ + unsigned do_zero:1; /* true if zeroing state */ + int syslog_facility; /* syslog facility value */ + int min_events; /* RESERVED FOR FUTURE USE */ + int max_events; /* RESERVED FOR FUTURE USE */ + char *conf_file; /* abs path to config file */ + char *pid_file; /* abs path to pid file */ + char *script_dir; /* abs path to script dir */ + zed_strings_t *scripts; /* names of enabled scripts */ + char *state_file; /* abs path to state file */ + int state_fd; /* fd to state file */ + libzfs_handle_t *zfs_hdl; /* handle to libzfs */ + int zevent_fd; /* fd for access to zevents */ +}; + +struct zed_conf *zed_conf_create(void); + +void zed_conf_destroy(struct zed_conf *zcp); + +void zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv); + +void zed_conf_parse_file(struct zed_conf *zcp); + +int zed_conf_scan_dir(struct zed_conf *zcp); + +int zed_conf_write_pid(struct zed_conf *zcp); + +int zed_conf_open_state(struct zed_conf *zcp); + +int zed_conf_read_state(struct zed_conf *zcp, uint64_t *eidp, int64_t etime[]); + +int zed_conf_write_state(struct zed_conf *zcp, uint64_t eid, int64_t etime[]); + +#endif /* !ZED_CONF_H */ diff --git a/cmd/zed/zed_event.c b/cmd/zed/zed_event.c new file mode 100644 index 000000000000..e504aefb9e96 --- /dev/null +++ b/cmd/zed/zed_event.c @@ -0,0 +1,829 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license from the top-level + * OPENSOLARIS.LICENSE or . + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each file + * and include the License file from the top-level OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + */ + +#include +#include +#include +#include /* FIXME: Replace with libzfs_core. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "zed.h" +#include "zed_conf.h" +#include "zed_exec.h" +#include "zed_file.h" +#include "zed_log.h" +#include "zed_strings.h" + +/* + * Open the libzfs interface. + */ +void +zed_event_init(struct zed_conf *zcp) +{ + if (!zcp) + zed_log_die("Failed zed_event_init: %s", strerror(EINVAL)); + + zcp->zfs_hdl = libzfs_init(); + if (!zcp->zfs_hdl) + zed_log_die("Failed to initialize libzfs"); + + zcp->zevent_fd = open(ZFS_DEV, O_RDWR); + if (zcp->zevent_fd < 0) + zed_log_die("Failed to open \"%s\": %s", + ZFS_DEV, strerror(errno)); +} + +/* + * Close the libzfs interface. + */ +void +zed_event_fini(struct zed_conf *zcp) +{ + if (!zcp) + zed_log_die("Failed zed_event_fini: %s", strerror(EINVAL)); + + if (zcp->zevent_fd >= 0) { + if (close(zcp->zevent_fd) < 0) + zed_log_msg(LOG_WARNING, "Failed to close \"%s\": %s", + ZFS_DEV, strerror(errno)); + + zcp->zevent_fd = -1; + } + if (zcp->zfs_hdl) { + libzfs_fini(zcp->zfs_hdl); + zcp->zfs_hdl = NULL; + } +} + +/* + * Seek to the event specified by [saved_eid] and [saved_etime]. + * This protects against processing a given event more than once. + * Return 0 upon a successful seek to the specified event, or -1 otherwise. + * A zevent is considered to be uniquely specified by its (eid,time) tuple. + * The unsigned 64b eid is set to 1 when the kernel module is loaded, and + * incremented by 1 for each new event. Since the state file can persist + * across a kernel module reload, the time must be checked to ensure a match. + */ +int +zed_event_seek(struct zed_conf *zcp, uint64_t saved_eid, int64_t saved_etime[]) +{ + uint64_t eid; + int found; + nvlist_t *nvl; + int n_dropped; + int64_t *etime; + uint_t nelem; + int rv; + + if (!zcp) { + errno = EINVAL; + zed_log_msg(LOG_ERR, "Failed to seek zevent: %s", + strerror(errno)); + return (-1); + } + eid = 0; + found = 0; + while ((eid < saved_eid) && !found) { + rv = zpool_events_next(zcp->zfs_hdl, &nvl, &n_dropped, + ZEVENT_NONBLOCK, zcp->zevent_fd); + + if ((rv != 0) || !nvl) + break; + + if (n_dropped > 0) { + zed_log_msg(LOG_WARNING, "Missed %d events", n_dropped); + /* + * FIXME: Increase max size of event nvlist in + * /sys/module/zfs/parameters/zfs_zevent_len_max ? + */ + } + if (nvlist_lookup_uint64(nvl, "eid", &eid) != 0) { + zed_log_msg(LOG_WARNING, "Failed to lookup zevent eid"); + } else if (nvlist_lookup_int64_array(nvl, "time", + &etime, &nelem) != 0) { + zed_log_msg(LOG_WARNING, + "Failed to lookup zevent time (eid=%llu)", eid); + } else if (nelem != 2) { + zed_log_msg(LOG_WARNING, + "Failed to lookup zevent time (eid=%llu, nelem=%u)", + eid, nelem); + } else if ((eid != saved_eid) || + (etime[0] != saved_etime[0]) || + (etime[1] != saved_etime[1])) { + /* no-op */ + } else { + found = 1; + } + free(nvl); + } + if (!found && (saved_eid > 0)) { + if (zpool_events_seek(zcp->zfs_hdl, ZEVENT_SEEK_START, + zcp->zevent_fd) < 0) + zed_log_msg(LOG_WARNING, "Failed to seek to eid=0"); + else + eid = 0; + } + zed_log_msg(LOG_NOTICE, "Processing events since eid=%llu", eid); + return (found ? 0 : -1); +} + +static int +_zed_event_convert_int8_array(char *buf, int buflen, nvpair_t *nvp) +{ + int8_t *i8p; + uint_t nelem; + uint_t i; + char *p; + int n; + + assert(buf != NULL); + + (void) nvpair_value_int8_array(nvp, &i8p, &nelem); + for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { + n = snprintf(p, buflen, "%d ", i8p[i]); + if ((n < 0) || (n >= buflen)) { + *buf = '\0'; + return (-1); + } + p += n; + buflen -= n; + } + if (nelem > 0) + *--p = '\0'; + + return (p - buf); +} + +static int +_zed_event_convert_uint8_array(char *buf, int buflen, nvpair_t *nvp) +{ + uint8_t *u8p; + uint_t nelem; + uint_t i; + char *p; + int n; + + assert(buf != NULL); + + (void) nvpair_value_uint8_array(nvp, &u8p, &nelem); + for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { + n = snprintf(p, buflen, "%u ", u8p[i]); + if ((n < 0) || (n >= buflen)) { + *buf = '\0'; + return (-1); + } + p += n; + buflen -= n; + } + if (nelem > 0) + *--p = '\0'; + + return (p - buf); +} + +static int +_zed_event_convert_int16_array(char *buf, int buflen, nvpair_t *nvp) +{ + int16_t *i16p; + uint_t nelem; + uint_t i; + char *p; + int n; + + assert(buf != NULL); + + (void) nvpair_value_int16_array(nvp, &i16p, &nelem); + for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { + n = snprintf(p, buflen, "%d ", i16p[i]); + if ((n < 0) || (n >= buflen)) { + *buf = '\0'; + return (-1); + } + p += n; + buflen -= n; + } + if (nelem > 0) + *--p = '\0'; + + return (p - buf); +} + +static int +_zed_event_convert_uint16_array(char *buf, int buflen, nvpair_t *nvp) +{ + uint16_t *u16p; + uint_t nelem; + uint_t i; + char *p; + int n; + + assert(buf != NULL); + + (void) nvpair_value_uint16_array(nvp, &u16p, &nelem); + for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { + n = snprintf(p, buflen, "%u ", u16p[i]); + if ((n < 0) || (n >= buflen)) { + *buf = '\0'; + return (-1); + } + p += n; + buflen -= n; + } + if (nelem > 0) + *--p = '\0'; + + return (p - buf); +} + +static int +_zed_event_convert_int32_array(char *buf, int buflen, nvpair_t *nvp) +{ + int32_t *i32p; + uint_t nelem; + uint_t i; + char *p; + int n; + + assert(buf != NULL); + + (void) nvpair_value_int32_array(nvp, &i32p, &nelem); + for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { + n = snprintf(p, buflen, "%d ", i32p[i]); + if ((n < 0) || (n >= buflen)) { + *buf = '\0'; + return (-1); + } + p += n; + buflen -= n; + } + if (nelem > 0) + *--p = '\0'; + + return (p - buf); +} + +static int +_zed_event_convert_uint32_array(char *buf, int buflen, nvpair_t *nvp) +{ + uint32_t *u32p; + uint_t nelem; + uint_t i; + char *p; + int n; + + assert(buf != NULL); + + (void) nvpair_value_uint32_array(nvp, &u32p, &nelem); + for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { + n = snprintf(p, buflen, "%u ", u32p[i]); + if ((n < 0) || (n >= buflen)) { + *buf = '\0'; + return (-1); + } + p += n; + buflen -= n; + } + if (nelem > 0) + *--p = '\0'; + + return (p - buf); +} + +static int +_zed_event_convert_int64_array(char *buf, int buflen, nvpair_t *nvp) +{ + int64_t *i64p; + uint_t nelem; + uint_t i; + char *p; + int n; + + assert(buf != NULL); + + (void) nvpair_value_int64_array(nvp, &i64p, &nelem); + for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { + n = snprintf(p, buflen, "%lld ", (u_longlong_t) i64p[i]); + if ((n < 0) || (n >= buflen)) { + *buf = '\0'; + return (-1); + } + p += n; + buflen -= n; + } + if (nelem > 0) + *--p = '\0'; + + return (p - buf); +} + +static int +_zed_event_convert_uint64_array(char *buf, int buflen, nvpair_t *nvp, + const char *fmt) +{ + uint64_t *u64p; + uint_t nelem; + uint_t i; + char *p; + int n; + + assert(buf != NULL); + + (void) nvpair_value_uint64_array(nvp, &u64p, &nelem); + for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { + n = snprintf(p, buflen, fmt, (u_longlong_t) u64p[i]); + if ((n < 0) || (n >= buflen)) { + *buf = '\0'; + return (-1); + } + p += n; + buflen -= n; + } + if (nelem > 0) + *--p = '\0'; + + return (p - buf); +} + +static int +_zed_event_convert_string_array(char *buf, int buflen, nvpair_t *nvp) +{ + char **strp; + uint_t nelem; + uint_t i; + char *p; + int n; + + assert(buf != NULL); + + (void) nvpair_value_string_array(nvp, &strp, &nelem); + for (i = 0, p = buf; (i < nelem) && (buflen > 0); i++) { + n = snprintf(p, buflen, "%s ", strp[i] ? strp[i] : ""); + if ((n < 0) || (n >= buflen)) { + *buf = '\0'; + return (-1); + } + p += n; + buflen -= n; + } + if (nelem > 0) + *--p = '\0'; + + return (p - buf); +} + +/* + * Return non-zero if nvpair [name] should be formatted in hex; o/w, return 0. + */ +static int +_zed_event_value_is_hex(const char *name) +{ + const char *hex_suffix[] = { + "_guid", + "_guids", + NULL + }; + const char **pp; + char *p; + + if (!name) + return (0); + + for (pp = hex_suffix; *pp; pp++) { + p = strstr(name, *pp); + if (p && strlen(p) == strlen(*pp)) + return (1); + } + return (0); +} + +/* + * Convert the nvpair [nvp] to a string which is added to the environment + * of the child process. + * Return 0 on success, -1 on error. + * FIXME: Refactor with cmd/zpool/zpool_main.c:zpool_do_events_nvprint()? + */ +static void +_zed_event_add_nvpair(uint64_t eid, zed_strings_t *zsp, nvpair_t *nvp) +{ + const char *name; + data_type_t type; + char buf[4096]; + int buflen; + int n; + char *p; + const char *q; + const char *fmt; + + boolean_t b; + double d; + uint8_t i8; + uint16_t i16; + uint32_t i32; + uint64_t i64; + char *str; + + assert(zsp != NULL); + assert(nvp != NULL); + + name = nvpair_name(nvp); + type = nvpair_type(nvp); + buflen = sizeof (buf); + + /* Copy NAME prefix for ZED zevent namespace. */ + n = strlcpy(buf, ZEVENT_VAR_PREFIX, sizeof (buf)); + if (n >= sizeof (buf)) { + zed_log_msg(LOG_WARNING, + "Failed to convert nvpair \"%s\" for eid=%llu: %s", + name, eid, "Exceeded buffer size"); + return; + } + buflen -= n; + p = buf + n; + + /* Convert NAME to alphanumeric uppercase. */ + for (q = name; *q && (buflen > 0); q++) { + *p++ = isalnum(*q) ? toupper(*q) : '_'; + buflen--; + } + + /* Separate NAME from VALUE. */ + if (buflen > 0) { + *p++ = '='; + buflen--; + } + *p = '\0'; + + /* Convert VALUE. */ + switch (type) { + case DATA_TYPE_BOOLEAN: + n = snprintf(p, buflen, "%s", "1"); + break; + case DATA_TYPE_BOOLEAN_VALUE: + (void) nvpair_value_boolean_value(nvp, &b); + n = snprintf(p, buflen, "%s", b ? "1" : "0"); + break; + case DATA_TYPE_BYTE: + (void) nvpair_value_byte(nvp, &i8); + n = snprintf(p, buflen, "%d", i8); + break; + case DATA_TYPE_INT8: + (void) nvpair_value_int8(nvp, (int8_t *) &i8); + n = snprintf(p, buflen, "%d", i8); + break; + case DATA_TYPE_UINT8: + (void) nvpair_value_uint8(nvp, &i8); + n = snprintf(p, buflen, "%u", i8); + break; + case DATA_TYPE_INT16: + (void) nvpair_value_int16(nvp, (int16_t *) &i16); + n = snprintf(p, buflen, "%d", i16); + break; + case DATA_TYPE_UINT16: + (void) nvpair_value_uint16(nvp, &i16); + n = snprintf(p, buflen, "%u", i16); + break; + case DATA_TYPE_INT32: + (void) nvpair_value_int32(nvp, (int32_t *) &i32); + n = snprintf(p, buflen, "%d", i32); + break; + case DATA_TYPE_UINT32: + (void) nvpair_value_uint32(nvp, &i32); + n = snprintf(p, buflen, "%u", i32); + break; + case DATA_TYPE_INT64: + (void) nvpair_value_int64(nvp, (int64_t *) &i64); + n = snprintf(p, buflen, "%lld", (longlong_t) i64); + break; + case DATA_TYPE_UINT64: + (void) nvpair_value_uint64(nvp, &i64); + fmt = _zed_event_value_is_hex(name) ? "0x%.16llX" : "%llu"; + n = snprintf(p, buflen, fmt, (u_longlong_t) i64); + break; + case DATA_TYPE_DOUBLE: + (void) nvpair_value_double(nvp, &d); + n = snprintf(p, buflen, "%g", d); + break; + case DATA_TYPE_HRTIME: + (void) nvpair_value_hrtime(nvp, (hrtime_t *) &i64); + n = snprintf(p, buflen, "%llu", (u_longlong_t) i64); + break; + case DATA_TYPE_NVLIST: + /* FIXME */ + n = snprintf(p, buflen, "%s", "_NOT_IMPLEMENTED_"); + break; + case DATA_TYPE_STRING: + (void) nvpair_value_string(nvp, &str); + n = snprintf(p, buflen, "%s", (str ? str : "")); + break; + case DATA_TYPE_BOOLEAN_ARRAY: + /* FIXME */ + n = snprintf(p, buflen, "%s", "_NOT_IMPLEMENTED_"); + break; + case DATA_TYPE_BYTE_ARRAY: + /* FIXME */ + n = snprintf(p, buflen, "%s", "_NOT_IMPLEMENTED_"); + break; + case DATA_TYPE_INT8_ARRAY: + n = _zed_event_convert_int8_array(p, buflen, nvp); + break; + case DATA_TYPE_UINT8_ARRAY: + n = _zed_event_convert_uint8_array(p, buflen, nvp); + break; + case DATA_TYPE_INT16_ARRAY: + n = _zed_event_convert_int16_array(p, buflen, nvp); + break; + case DATA_TYPE_UINT16_ARRAY: + n = _zed_event_convert_uint16_array(p, buflen, nvp); + break; + case DATA_TYPE_INT32_ARRAY: + n = _zed_event_convert_int32_array(p, buflen, nvp); + break; + case DATA_TYPE_UINT32_ARRAY: + n = _zed_event_convert_uint32_array(p, buflen, nvp); + break; + case DATA_TYPE_INT64_ARRAY: + n = _zed_event_convert_int64_array(p, buflen, nvp); + break; + case DATA_TYPE_UINT64_ARRAY: + fmt = _zed_event_value_is_hex(name) ? "0x%.16llX " : "%llu "; + n = _zed_event_convert_uint64_array(p, buflen, nvp, fmt); + break; + case DATA_TYPE_STRING_ARRAY: + n = _zed_event_convert_string_array(p, buflen, nvp); + break; + case DATA_TYPE_NVLIST_ARRAY: + /* FIXME */ + n = snprintf(p, buflen, "%s", "_NOT_IMPLEMENTED_"); + break; + default: + zed_log_msg(LOG_WARNING, + "Failed to convert nvpair \"%s\" for eid=%llu: " + "Unrecognized type=%u", name, eid, (unsigned int) type); + return; + } + if ((n < 0) || (n >= sizeof (buf))) { + zed_log_msg(LOG_WARNING, + "Failed to convert nvpair \"%s\" for eid=%llu: %s", + name, eid, "Exceeded buffer size"); + return; + } + if (zed_strings_add(zsp, buf) < 0) { + zed_log_msg(LOG_WARNING, + "Failed to convert nvpair \"%s\" for eid=%llu: %s", + name, eid, strerror(ENOMEM)); + return; + } +} + +/* + * Add the environment variable specified by the format string [fmt]. + */ +static void +_zed_event_add_var(uint64_t eid, zed_strings_t *zsp, const char *fmt, ...) +{ + char buf[4096]; + va_list vargs; + int n; + const char *p; + size_t namelen; + + assert(zsp != NULL); + assert(fmt != NULL); + + va_start(vargs, fmt); + n = vsnprintf(buf, sizeof (buf), fmt, vargs); + va_end(vargs); + p = strchr(buf, '='); + namelen = (p) ? p - buf : strlen(buf); + + if ((n < 0) || (n >= sizeof (buf))) { + zed_log_msg(LOG_WARNING, "Failed to add %.*s for eid=%llu: %s", + namelen, buf, eid, "Exceeded buffer size"); + } else if (!p) { + zed_log_msg(LOG_WARNING, "Failed to add %.*s for eid=%llu: %s", + namelen, buf, eid, "Missing assignment"); + } else if (zed_strings_add(zsp, buf) < 0) { + zed_log_msg(LOG_WARNING, "Failed to add %.*s for eid=%llu: %s", + namelen, buf, eid, strerror(ENOMEM)); + } +} + +/* + * Restrict various environment variables to safe and sane values + * when constructing the environment for the child process. + * Reference: Secure Programming Cookbook by Viega & Messier, Section 1.1. + */ +static void +_zed_event_add_env_restrict(uint64_t eid, zed_strings_t *zsp) +{ + const char *env_restrict[] = { + "IFS= \t\n", + "PATH=" _PATH_STDPATH, + "ZDB=" SBINDIR "/zdb", + "ZED=" SBINDIR "/zed", + "ZFS=" SBINDIR "/zfs", + "ZINJECT=" SBINDIR "/zinject", + "ZPOOL=" SBINDIR "/zpool", + "ZFS_ALIAS=" ZFS_META_ALIAS, + "ZFS_VERSION=" ZFS_META_VERSION, + "ZFS_RELEASE=" ZFS_META_RELEASE, + NULL + }; + const char **pp; + + assert(zsp != NULL); + + for (pp = env_restrict; *pp; pp++) { + _zed_event_add_var(eid, zsp, "%s", *pp); + } +} + +/* + * Preserve specified variables from the parent environment + * when constructing the environment for the child process. + * Reference: Secure Programming Cookbook by Viega & Messier, Section 1.1. + */ +static void +_zed_event_add_env_preserve(uint64_t eid, zed_strings_t *zsp) +{ + const char *env_preserve[] = { + "TZ", + NULL + }; + const char **pp; + const char *p; + + assert(zsp != NULL); + + for (pp = env_preserve; *pp; pp++) { + if ((p = getenv(*pp))) + _zed_event_add_var(eid, zsp, "%s=%s", *pp, p); + } +} + +/* + * Compute the "subclass" by removing the first 3 components of [class] + * (which seem to always be either "ereport.fs.zfs" or "resource.fs.zfs"). + * Return a pointer inside the string [class], or NULL if insufficient + * components exist. + */ +static const char * +_zed_event_get_subclass(const char *class) +{ + const char *p; + int i; + + if (!class) + return (NULL); + + p = class; + for (i = 0; i < 3; i++) { + p = strchr(p, '.'); + if (!p) + break; + p++; + } + return (p); +} + +/* + * Convert the zevent time from a 2-element array of 64b integers + * into a more convenient form: + * TIME_SECS is the second component of the time. + * TIME_NSECS is the nanosecond component of the time. + * TIME_STRING is an almost-RFC3339-compliant string representation. + */ +static void +_zed_event_add_time_strings(uint64_t eid, zed_strings_t *zsp, int64_t etime[]) +{ + struct tm *stp; + char buf[32]; + + assert(zsp != NULL); + assert(etime != NULL); + + _zed_event_add_var(eid, zsp, "%s%s=%lld", + ZEVENT_VAR_PREFIX, "TIME_SECS", (long long int) etime[0]); + _zed_event_add_var(eid, zsp, "%s%s=%lld", + ZEVENT_VAR_PREFIX, "TIME_NSECS", (long long int) etime[1]); + + if (!(stp = localtime((const time_t *) &etime[0]))) { + zed_log_msg(LOG_WARNING, "Failed to add %s%s for eid=%llu: %s", + ZEVENT_VAR_PREFIX, "TIME_STRING", eid, "localtime error"); + } else if (!strftime(buf, sizeof (buf), "%Y-%m-%d %H:%M:%S%z", stp)) { + zed_log_msg(LOG_WARNING, "Failed to add %s%s for eid=%llu: %s", + ZEVENT_VAR_PREFIX, "TIME_STRING", eid, "strftime error"); + } else { + _zed_event_add_var(eid, zsp, "%s%s=%s", + ZEVENT_VAR_PREFIX, "TIME_STRING", buf); + } +} + +/* + * Service the next zevent, blocking until one is available. + */ +void +zed_event_service(struct zed_conf *zcp) +{ + nvlist_t *nvl; + nvpair_t *nvp; + int n_dropped; + zed_strings_t *zsp; + uint64_t eid; + int64_t *etime; + uint_t nelem; + char *class; + const char *subclass; + int rv; + + if (!zcp) { + errno = EINVAL; + zed_log_msg(LOG_ERR, "Failed to service zevent: %s", + strerror(errno)); + return; + } + rv = zpool_events_next(zcp->zfs_hdl, &nvl, &n_dropped, ZEVENT_NONE, + zcp->zevent_fd); + + if ((rv != 0) || !nvl) + return; + + if (n_dropped > 0) { + zed_log_msg(LOG_WARNING, "Missed %d events", n_dropped); + /* + * FIXME: Increase max size of event nvlist in + * /sys/module/zfs/parameters/zfs_zevent_len_max ? + */ + } + if (nvlist_lookup_uint64(nvl, "eid", &eid) != 0) { + zed_log_msg(LOG_WARNING, "Failed to lookup zevent eid"); + } else if (nvlist_lookup_int64_array( + nvl, "time", &etime, &nelem) != 0) { + zed_log_msg(LOG_WARNING, + "Failed to lookup zevent time (eid=%llu)", eid); + } else if (nelem != 2) { + zed_log_msg(LOG_WARNING, + "Failed to lookup zevent time (eid=%llu, nelem=%u)", + eid, nelem); + } else if (nvlist_lookup_string(nvl, "class", &class) != 0) { + zed_log_msg(LOG_WARNING, + "Failed to lookup zevent class (eid=%llu)", eid); + } else { + zsp = zed_strings_create(); + + nvp = NULL; + while ((nvp = nvlist_next_nvpair(nvl, nvp))) + _zed_event_add_nvpair(eid, zsp, nvp); + + _zed_event_add_env_restrict(eid, zsp); + _zed_event_add_env_preserve(eid, zsp); + + _zed_event_add_var(eid, zsp, "%s%s=%d", + ZED_VAR_PREFIX, "PID", (int) getpid()); + _zed_event_add_var(eid, zsp, "%s%s=%s", + ZED_VAR_PREFIX, "SCRIPT_DIR", zcp->script_dir); + + subclass = _zed_event_get_subclass(class); + _zed_event_add_var(eid, zsp, "%s%s=%s", + ZEVENT_VAR_PREFIX, "SUBCLASS", + (subclass ? subclass : class)); + _zed_event_add_time_strings(eid, zsp, etime); + + zed_exec_process(eid, class, subclass, + zcp->script_dir, zcp->scripts, zsp, zcp->zevent_fd); + + zed_conf_write_state(zcp, eid, etime); + + zed_strings_destroy(zsp); + } + nvlist_free(nvl); +} diff --git a/cmd/zed/zed_event.h b/cmd/zed/zed_event.h new file mode 100644 index 000000000000..71b3a2babed3 --- /dev/null +++ b/cmd/zed/zed_event.h @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license from the top-level + * OPENSOLARIS.LICENSE or . + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each file + * and include the License file from the top-level OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + */ + +#ifndef ZED_EVENT_H +#define ZED_EVENT_H + +#include + +void zed_event_init(struct zed_conf *zcp); + +void zed_event_fini(struct zed_conf *zcp); + +int zed_event_seek(struct zed_conf *zcp, uint64_t saved_eid, + int64_t saved_etime[]); + +void zed_event_service(struct zed_conf *zcp); + +#endif /* !ZED_EVENT_H */ diff --git a/cmd/zed/zed_exec.c b/cmd/zed/zed_exec.c new file mode 100644 index 000000000000..f461b78408ac --- /dev/null +++ b/cmd/zed/zed_exec.c @@ -0,0 +1,207 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license from the top-level + * OPENSOLARIS.LICENSE or . + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each file + * and include the License file from the top-level OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "zed_file.h" +#include "zed_log.h" +#include "zed_strings.h" + +#define ZEVENT_FILENO 3 + +/* + * Create an environment string array for passing to execve() using the + * NAME=VALUE strings in container [zsp]. + * Return a newly-allocated environment, or NULL on error. + */ +static char ** +_zed_exec_create_env(zed_strings_t *zsp) +{ + int num_ptrs; + int buflen; + char *buf; + char **pp; + char *p; + const char *q; + int i; + int len; + + num_ptrs = zed_strings_count(zsp) + 1; + buflen = num_ptrs * sizeof (char *); + for (q = zed_strings_first(zsp); q; q = zed_strings_next(zsp)) + buflen += strlen(q) + 1; + + buf = malloc(buflen); + if (!buf) + return (NULL); + + pp = (char **) buf; + p = buf + (num_ptrs * sizeof (char *)); + i = 0; + for (q = zed_strings_first(zsp); q; q = zed_strings_next(zsp)) { + pp[i] = p; + len = strlen(q) + 1; + memcpy(p, q, len); + p += len; + i++; + } + pp[i] = NULL; + assert(buf + buflen == p); + return ((char **) buf); +} + +/* + * Fork a child process to handle event [eid]. The program [prog] + * in directory [dir] is executed with the envionment [env]. + * The file descriptor [zfd] is the zevent_fd used to track the + * current cursor location within the zevent nvlist. + */ +static void +_zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog, + char *env[], int zfd) +{ + char path[PATH_MAX]; + int n; + pid_t pid; + int fd; + pid_t wpid; + int status; + + assert(dir != NULL); + assert(prog != NULL); + assert(env != NULL); + assert(zfd >= 0); + + n = snprintf(path, sizeof (path), "%s/%s", dir, prog); + if ((n < 0) || (n >= sizeof (path))) { + zed_log_msg(LOG_WARNING, + "Failed to fork \"%s\" for eid=%llu: %s", + prog, eid, strerror(ENAMETOOLONG)); + return; + } + pid = fork(); + if (pid < 0) { + zed_log_msg(LOG_WARNING, + "Failed to fork \"%s\" for eid=%llu: %s", + prog, eid, strerror(errno)); + return; + } else if (pid == 0) { + (void) umask(022); + fd = open("/dev/null", O_RDWR); + (void) dup2(fd, STDIN_FILENO); + (void) dup2(fd, STDOUT_FILENO); + (void) dup2(fd, STDERR_FILENO); + (void) dup2(zfd, ZEVENT_FILENO); + zed_file_close_from(ZEVENT_FILENO + 1); + execle(path, prog, NULL, env); + _exit(127); + } else { + zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu pid=%d", + prog, eid, pid); + /* FIXME: Timeout rogue child processes with sigalarm? */ +restart: + wpid = waitpid(pid, &status, 0); + if (wpid == (pid_t) -1) { + if (errno == EINTR) + goto restart; + zed_log_msg(LOG_WARNING, + "Failed to wait for \"%s\" eid=%llu pid=%d", + prog, eid, pid); + } else if (WIFEXITED(status)) { + zed_log_msg(LOG_INFO, + "Finished \"%s\" eid=%llu pid=%d exit=%d", + prog, eid, pid, WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + zed_log_msg(LOG_INFO, + "Finished \"%s\" eid=%llu pid=%d sig=%d/%s", + prog, eid, pid, WTERMSIG(status), + strsignal(WTERMSIG(status))); + } else { + zed_log_msg(LOG_INFO, + "Finished \"%s\" eid=%llu pid=%d status=0x%X", + prog, eid, (unsigned int) status); + } + } +} + +/* + * Process the event [eid] by synchronously invoking all scripts with a + * matching class prefix. + * Each executable in [scripts] from the directory [dir] is matched against + * the event's [class], [subclass], and the "all" class (which matches + * all events). Every script with a matching class prefix is invoked. + * The NAME=VALUE strings in [envs] will be passed to the script as + * environment variables. + * The file descriptor [zfd] is the zevent_fd used to track the + * current cursor location within the zevent nvlist. + * Return 0 on success, -1 on error. + */ +int +zed_exec_process(uint64_t eid, const char *class, const char *subclass, + const char *dir, zed_strings_t *scripts, zed_strings_t *envs, int zfd) +{ + const char *class_strings[4]; + const char *allclass = "all"; + const char **csp; + const char *s; + char **e; + int n; + + if (!dir || !scripts || !envs || zfd < 0) + return (-1); + + csp = class_strings; + + if (class) + *csp++ = class; + + if (subclass) + *csp++ = subclass; + + if (allclass) + *csp++ = allclass; + + *csp = NULL; + + e = _zed_exec_create_env(envs); + + for (s = zed_strings_first(scripts); s; s = zed_strings_next(scripts)) { + for (csp = class_strings; *csp; csp++) { + n = strlen(*csp); + if ((strncmp(s, *csp, n) == 0) && !isalpha(s[n])) + _zed_exec_fork_child(eid, dir, s, e, zfd); + } + } + free(e); + return (0); +} diff --git a/cmd/zed/zed_exec.h b/cmd/zed/zed_exec.h new file mode 100644 index 000000000000..52bdc12a8938 --- /dev/null +++ b/cmd/zed/zed_exec.h @@ -0,0 +1,36 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license from the top-level + * OPENSOLARIS.LICENSE or . + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each file + * and include the License file from the top-level OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + */ + +#ifndef ZED_EXEC_H +#define ZED_EXEC_H + +#include + +int zed_exec_process(uint64_t eid, const char *class, const char *subclass, + const char *dir, zed_strings_t *scripts, zed_strings_t *envs, + int zevent_fd); + +#endif /* !ZED_EXEC_H */ diff --git a/cmd/zed/zed_file.c b/cmd/zed/zed_file.c new file mode 100644 index 000000000000..d73e64976f65 --- /dev/null +++ b/cmd/zed/zed_file.c @@ -0,0 +1,316 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license from the top-level + * OPENSOLARIS.LICENSE or . + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each file + * and include the License file from the top-level OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "zed_log.h" + +/* + * Read up to [n] bytes from [fd] into [buf]. + * Return the number of bytes read, 0 on EOF, or -1 on error. + */ +ssize_t +zed_file_read_n(int fd, void *buf, size_t n) +{ + unsigned char *p; + size_t n_left; + ssize_t n_read; + + p = buf; + n_left = n; + while (n_left > 0) { + if ((n_read = read(fd, p, n_left)) < 0) { + if (errno == EINTR) + continue; + else + return (-1); + + } else if (n_read == 0) { + break; + } + n_left -= n_read; + p += n_read; + } + return (n - n_left); +} + +/* + * Write [n] bytes from [buf] out to [fd]. + * Return the number of bytes written, or -1 on error. + */ +ssize_t +zed_file_write_n(int fd, void *buf, size_t n) +{ + const unsigned char *p; + size_t n_left; + ssize_t n_written; + + p = buf; + n_left = n; + while (n_left > 0) { + if ((n_written = write(fd, p, n_left)) < 0) { + if (errno == EINTR) + continue; + else + return (-1); + + } + n_left -= n_written; + p += n_written; + } + return (n); +} + +/* + * Set an exclusive advisory lock on the open file descriptor [fd]. + * Return 0 on success, 1 if a conflicting lock is held by another process, + * or -1 on error (with errno set). + */ +int +zed_file_lock(int fd) +{ + struct flock lock; + + if (fd < 0) { + errno = EBADF; + return (-1); + } + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + + if (fcntl(fd, F_SETLK, &lock) < 0) { + if ((errno == EACCES) || (errno == EAGAIN)) + return (1); + + return (-1); + } + return (0); +} + +/* + * Release an advisory lock held on the open file descriptor [fd]. + * Return 0 on success, or -1 on error (with errno set). + */ +int +zed_file_unlock(int fd) +{ + struct flock lock; + + if (fd < 0) { + errno = EBADF; + return (-1); + } + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + + if (fcntl(fd, F_SETLK, &lock) < 0) + return (-1); + + return (0); +} + +/* + * Test whether an exclusive advisory lock could be obtained for the open + * file descriptor [fd]. + * Return 0 if the file is not locked, >0 for the pid of another process + * holding a conflicting lock, or -1 on error (with errno set). + */ +pid_t +zed_file_is_locked(int fd) +{ + struct flock lock; + + if (fd < 0) { + errno = EBADF; + return (-1); + } + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + + if (fcntl(fd, F_GETLK, &lock) < 0) + return (-1); + + if (lock.l_type == F_UNLCK) + return (0); + + return (lock.l_pid); +} + +/* + * Close all open file descriptors greater than or equal to [lowfd]. + * Any errors encountered while closing file descriptors are ignored. + */ +void +zed_file_close_from(int lowfd) +{ + const int maxfd_def = 256; + int errno_bak; + struct rlimit rl; + int maxfd; + int fd; + + errno_bak = errno; + + if (getrlimit(RLIMIT_NOFILE, &rl) < 0) { + maxfd = maxfd_def; + } else if (rl.rlim_max == RLIM_INFINITY) { + maxfd = maxfd_def; + } else { + maxfd = rl.rlim_max; + } + for (fd = lowfd; fd < maxfd; fd++) + (void) close(fd); + + errno = errno_bak; +} + +/* + * Set the CLOEXEC flag on file descriptor [fd] so it will be automatically + * closed upon successful execution of one of the exec functions. + * Return 0 on success, or -1 on error. + * FIXME: No longer needed? + */ +int +zed_file_close_on_exec(int fd) +{ + int flags; + + if (fd < 0) { + errno = EBADF; + return (-1); + } + flags = fcntl(fd, F_GETFD); + if (flags == -1) + return (-1); + + flags |= FD_CLOEXEC; + + if (fcntl(fd, F_SETFD, flags) == -1) + return (-1); + + return (0); +} + +/* + * Create the directory [dir_name] and any missing parent directories. + * Directories will be created with permissions 0755 modified by the umask. + * Return 0 on success, or -1 on error. + * FIXME: Deprecate in favor of mkdirp(). (lib/libspl/mkdirp.c) + */ +int +zed_file_create_dirs(const char *dir_name) +{ + struct stat st; + char dir_buf[PATH_MAX]; + mode_t dir_mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; + char *p; + + if ((dir_name == NULL) || (dir_name[0] == '\0')) { + zed_log_msg(LOG_WARNING, + "Failed to create directory: no directory specified"); + errno = EINVAL; + return (-1); + } + if (dir_name[0] != '/') { + zed_log_msg(LOG_WARNING, + "Failed to create directory \"%s\": not absolute path", + dir_name); + errno = EINVAL; + return (-1); + } + /* Check if directory already exists. */ + if (stat(dir_name, &st) == 0) { + if (S_ISDIR(st.st_mode)) + return (0); + + errno = EEXIST; + zed_log_msg(LOG_WARNING, + "Failed to create directory \"%s\": %s", + dir_name, strerror(errno)); + return (-1); + } + /* Create copy for modification. */ + if (strlen(dir_name) >= sizeof (dir_buf)) { + errno = ENAMETOOLONG; + zed_log_msg(LOG_WARNING, + "Failed to create directory \"%s\": %s", + dir_name, strerror(errno)); + return (-1); + } + strncpy(dir_buf, dir_name, sizeof (dir_buf)); + + /* Remove trailing slashes. */ + p = dir_buf + strlen(dir_buf) - 1; + while ((p > dir_buf) && (*p == '/')) + *p-- = '\0'; + + /* Process directory components starting from the root dir. */ + p = dir_buf; + + while (1) { + + /* Skip over adjacent slashes. */ + while (*p == '/') + p++; + + /* Advance to the next path component. */ + p = strchr(p, '/'); + if (p != NULL) + *p = '\0'; + + /* Create directory. */ + if (mkdir(dir_buf, dir_mode) < 0) { + + int mkdir_errno = errno; + + if ((mkdir_errno == EEXIST) || + (stat(dir_buf, &st) < 0) || + (!S_ISDIR(st.st_mode))) { + zed_log_msg(LOG_WARNING, + "Failed to create directory \"%s\": %s", + dir_buf, strerror(mkdir_errno)); + return (-1); + } + } + if (p == NULL) + break; + + *p++ = '/'; + } + return (0); +} diff --git a/cmd/zed/zed_file.h b/cmd/zed/zed_file.h new file mode 100644 index 000000000000..6a31dc3d628b --- /dev/null +++ b/cmd/zed/zed_file.h @@ -0,0 +1,49 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license from the top-level + * OPENSOLARIS.LICENSE or . + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each file + * and include the License file from the top-level OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + */ + +#ifndef ZED_FILE_H +#define ZED_FILE_H + +#include +#include + +ssize_t zed_file_read_n(int fd, void *buf, size_t n); + +ssize_t zed_file_write_n(int fd, void *buf, size_t n); + +int zed_file_lock(int fd); + +int zed_file_unlock(int fd); + +pid_t zed_file_is_locked(int fd); + +void zed_file_close_from(int fd); + +int zed_file_close_on_exec(int fd); + +int zed_file_create_dirs(const char *dir_name); + +#endif /* !ZED_FILE_H */ diff --git a/cmd/zed/zed_log.c b/cmd/zed/zed_log.c new file mode 100644 index 000000000000..bc432bc212bd --- /dev/null +++ b/cmd/zed/zed_log.c @@ -0,0 +1,171 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license from the top-level + * OPENSOLARIS.LICENSE or . + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each file + * and include the License file from the top-level OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + */ + +#include +#include +#include +#include +#include +#include +#include "zed_log.h" + +#define ZED_LOG_MAX_ID_LEN 64 +#define ZED_LOG_MAX_LOG_LEN 1024 + +static struct { + unsigned do_stderr:1; + unsigned do_syslog:1; + int level; + char id[ZED_LOG_MAX_ID_LEN]; +} _ctx; + +void +zed_log_init(const char *identity) +{ + const char *p; + + if (identity) { + p = (p = strrchr(identity, '/')) ? p + 1 : identity; + strlcpy(_ctx.id, p, sizeof (_ctx.id)); + } else { + _ctx.id[0] = '\0'; + } +} + +void +zed_log_fini() +{ + if (_ctx.do_syslog) { + closelog(); + } +} + +void +zed_log_stderr_open(int level) +{ + _ctx.do_stderr = 1; + _ctx.level = level; +} + +void +zed_log_stderr_close(void) +{ + _ctx.do_stderr = 0; +} + +void +zed_log_syslog_open(int facility) +{ + const char *identity; + + _ctx.do_syslog = 1; + identity = (_ctx.id[0] == '\0') ? NULL : _ctx.id; + openlog(identity, LOG_NDELAY, facility); +} + +void +zed_log_syslog_close(void) +{ + _ctx.do_syslog = 0; + closelog(); +} + +static void +_zed_log_aux(int priority, const char *fmt, va_list vargs) +{ + char buf[ZED_LOG_MAX_LOG_LEN]; + char *syslogp; + char *p; + int len; + int n; + + assert(fmt != NULL); + + syslogp = NULL; + p = buf; + len = sizeof (buf); + + if (_ctx.id[0] != '\0') { + n = snprintf(p, len, "%s: ", _ctx.id); + if ((n < 0) || (n >= len)) { + p += len - 1; + len = 0; + } else { + p += n; + len -= n; + } + } + if ((len > 0) && fmt) { + syslogp = p; + n = vsnprintf(p, len, fmt, vargs); + if ((n < 0) || (n >= len)) { + p += len - 1; + len = 0; + } else { + p += n; + len -= n; + } + } + *p = '\0'; + + if (_ctx.do_syslog && syslogp) + syslog(priority, "%s", syslogp); + + if (_ctx.do_stderr && priority <= _ctx.level) + fprintf(stderr, "%s\n", buf); +} + +/* + * Log a message at the given [priority] level specified by the printf-style + * format string [fmt]. + */ +void +zed_log_msg(int priority, const char *fmt, ...) +{ + va_list vargs; + + if (fmt) { + va_start(vargs, fmt); + _zed_log_aux(priority, fmt, vargs); + va_end(vargs); + } +} + +/* + * Log a fatal error message specified by the printf-style format string [fmt]. + */ +void +zed_log_die(const char *fmt, ...) +{ + va_list vargs; + + if (fmt) { + va_start(vargs, fmt); + _zed_log_aux(LOG_ERR, fmt, vargs); + va_end(vargs); + } + exit(EXIT_FAILURE); +} diff --git a/cmd/zed/zed_log.h b/cmd/zed/zed_log.h new file mode 100644 index 000000000000..7ae4549fe87d --- /dev/null +++ b/cmd/zed/zed_log.h @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license from the top-level + * OPENSOLARIS.LICENSE or . + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each file + * and include the License file from the top-level OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + */ + +#ifndef ZED_LOG_H +#define ZED_LOG_H + +#include + +void zed_log_init(const char *identity); + +void zed_log_fini(void); + +void zed_log_stderr_open(int level); + +void zed_log_stderr_close(void); + +void zed_log_syslog_open(int facility); + +void zed_log_syslog_close(void); + +void zed_log_msg(int priority, const char *fmt, ...); + +void zed_log_die(const char *fmt, ...); + +#endif /* !ZED_LOG_H */ diff --git a/cmd/zed/zed_strings.c b/cmd/zed/zed_strings.c new file mode 100644 index 000000000000..05a374055805 --- /dev/null +++ b/cmd/zed/zed_strings.c @@ -0,0 +1,200 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license from the top-level + * OPENSOLARIS.LICENSE or . + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each file + * and include the License file from the top-level OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "zed_strings.h" + +struct zed_strings { + avl_tree_t tree; + avl_node_t *iteratorp; +}; + +struct zed_strings_node { + avl_node_t node; + char string[]; +}; + +typedef struct zed_strings_node zed_strings_node_t; + +/* + * Compare zed_strings_node_t nodes [x1] and [x2]. + * As required for the AVL tree, return exactly + * -1 for <, 0 for ==, and +1 for >. + */ +static int +_zed_strings_node_compare(const void *x1, const void *x2) +{ + const char *s1; + const char *s2; + int rv; + + assert(x1 != NULL); + assert(x2 != NULL); + + s1 = ((const zed_strings_node_t *) x1)->string; + assert(s1 != NULL); + s2 = ((const zed_strings_node_t *) x2)->string; + assert(s2 != NULL); + rv = strcmp(s1, s2); + + if (rv < 0) + return (-1); + + if (rv > 0) + return (1); + + return (0); +} + +/* + * Return a new string container, or NULL on error. + */ +zed_strings_t * +zed_strings_create(void) +{ + zed_strings_t *zsp; + + zsp = malloc(sizeof (*zsp)); + if (!zsp) + return (NULL); + + memset(zsp, 0, sizeof (*zsp)); + avl_create(&zsp->tree, _zed_strings_node_compare, + sizeof (zed_strings_node_t), offsetof(zed_strings_node_t, node)); + + zsp->iteratorp = NULL; + return (zsp); +} + +/* + * Destroy the string container [zsp] and all strings within. + */ +void +zed_strings_destroy(zed_strings_t *zsp) +{ + void *cookie; + zed_strings_node_t *np; + + if (!zsp) + return; + + cookie = NULL; + while ((np = avl_destroy_nodes(&zsp->tree, &cookie))) + free(np); + + avl_destroy(&zsp->tree); + free(zsp); +} + +/* + * Add a copy of the string [s] to the container [zsp]. + * Return 0 on success, or -1 on error. + * FIXME: Handle dup strings. + */ +int +zed_strings_add(zed_strings_t *zsp, const char *s) +{ + size_t len; + zed_strings_node_t *np; + + if (!zsp || !s) { + errno = EINVAL; + return (-1); + } + len = sizeof (zed_strings_node_t) + strlen(s) + 1; + np = malloc(len); + if (!np) + return (-1); + + memset(np, 0, len); + assert((char *) np->string + strlen(s) < (char *) np + len); + (void) strcpy(np->string, s); + avl_add(&zsp->tree, np); + return (0); +} + +/* + * Return the first string in container [zsp]. + * Return NULL if there are no strings, or on error. + * This can be called multiple times to re-traverse [zsp]. + * XXX: Not thread-safe. + */ +const char * +zed_strings_first(zed_strings_t *zsp) +{ + if (!zsp) { + errno = EINVAL; + return (NULL); + } + zsp->iteratorp = avl_first(&zsp->tree); + if (!zsp->iteratorp) + return (NULL); + + return (((zed_strings_node_t *) zsp->iteratorp)->string); + +} + +/* + * Return the next string in container [zsp]. + * Return NULL after the last string, or on error. + * This must be called after zed_strings_first(). + * XXX: Not thread-safe. + */ +const char * +zed_strings_next(zed_strings_t *zsp) +{ + if (!zsp) { + errno = EINVAL; + return (NULL); + } + if (!zsp->iteratorp) + return (NULL); + + zsp->iteratorp = AVL_NEXT(&zsp->tree, zsp->iteratorp); + if (!zsp->iteratorp) + return (NULL); + + return (((zed_strings_node_t *)zsp->iteratorp)->string); +} + +/* + * Return the number of strings in container [zsp], or -1 on error. + */ +int +zed_strings_count(zed_strings_t *zsp) +{ + if (!zsp) { + errno = EINVAL; + return (-1); + } + return (avl_numnodes(&zsp->tree)); +} diff --git a/cmd/zed/zed_strings.h b/cmd/zed/zed_strings.h new file mode 100644 index 000000000000..c1ea804bbe5b --- /dev/null +++ b/cmd/zed/zed_strings.h @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license from the top-level + * OPENSOLARIS.LICENSE or . + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each file + * and include the License file from the top-level OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + */ + +#ifndef ZED_STRINGS_H +#define ZED_STRINGS_H + +typedef struct zed_strings zed_strings_t; + +zed_strings_t * zed_strings_create(void); + +void zed_strings_destroy(zed_strings_t *zsp); + +int zed_strings_add(zed_strings_t *zsp, const char *s); + +const char * zed_strings_first(zed_strings_t *zsp); + +const char * zed_strings_next(zed_strings_t *zsp); + +int zed_strings_count(zed_strings_t *zsp); + +#endif /* !ZED_STRINGS_H */ diff --git a/configure.ac b/configure.ac index 7787530d34b4..66272fdf1a07 100644 --- a/configure.ac +++ b/configure.ac @@ -108,6 +108,7 @@ AC_CONFIG_FILES([ cmd/vdev_id/Makefile cmd/arcstat/Makefile cmd/dbufstat/Makefile + cmd/zed/Makefile module/Makefile module/avl/Makefile module/nvpair/Makefile diff --git a/man/man8/.gitignore b/man/man8/.gitignore new file mode 100644 index 000000000000..be7e9040f57f --- /dev/null +++ b/man/man8/.gitignore @@ -0,0 +1 @@ +/zed.8 diff --git a/man/man8/Makefile.am b/man/man8/Makefile.am index f680822215cc..b89e34dfd4fe 100644 --- a/man/man8/Makefile.am +++ b/man/man8/Makefile.am @@ -8,5 +8,28 @@ dist_man_MANS = \ zpool.8 \ zstreamdump.8 +nodist_man_MANS = \ + zed.8 + +EXTRA_DIST = \ + zed.8.in + +zed.8: $(srcdir)/zed.8.in + +do_subst = $(SED) \ + -e 's|@libexecdir[@]|$(libexecdir)|g' \ + -e 's|@runstatedir[@]|$(runstatedir)|g' \ + -e 's|@sysconfdir[@]|$(sysconfdir)|g' + +$(nodist_man_MANS): Makefile + $(RM) $@ $@.tmp + srcdir=''; \ + test -f ./$@.in || srcdir=$(srcdir)/; \ + $(do_subst) $${srcdir}$@.in >$@.tmp + mv $@.tmp $@ + install-data-local: $(INSTALL) -d -m 0755 "$(DESTDIR)$(mandir)/man8" + +CLEANFILES = \ + $(nodist_man_MANS) diff --git a/man/man8/zed.8.in b/man/man8/zed.8.in new file mode 100644 index 000000000000..b853d86e5f91 --- /dev/null +++ b/man/man8/zed.8.in @@ -0,0 +1,265 @@ +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license from the top-level +.\" OPENSOLARIS.LICENSE or . +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each file +.\" and include the License file from the top-level OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). +.\" Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. +.\" +.TH ZED 8 "Octember 1, 2013" "ZFS on Linux" "System Administration Commands" + +.SH NAME +zed \- ZFS Event Daemon + +.SH SYNOPSIS +.HP +.B zed +.\" [\fB\-c\fR \fIconfigfile\fR] +[\fB\-d\fR \fIscriptdir\fR] +[\fB\-f\fR] +[\fB\-F\fR] +[\fB\-h\fR] +[\fB\-L\fR] +[\fB\-M\fR] +[\fB\-p\fR \fIpidfile\fR] +[\fB\-s\fR \fIstatefile\fR] +[\fB\-v\fR] +[\fB\-V\fR] +[\fB\-Z\fR] + +.SH DESCRIPTION +.PP +\fBzed\fR (ZFS Event Daemon) monitors events generated by the ZFS kernel +module. When a ZFS event (zevent) is posted, \fBzed\fR will run any scripts +that have been enabled for the corresponding zevent class. + +.SH OPTIONS +.TP +.BI \-h +Display a summary of the command-line options. +.TP +.BI \-L +Display license information. +.TP +.BI \-V +Display version information. +.TP +.BI \-v +Be verbose. +.TP +.BI \-f +Force the daemon to run if at all possible, disabling security checks and +throwing caution to the wind. Not recommended for use in production. +.TP +.BI \-F +Run the daemon in the foreground. +.TP +.BI \-M +Lock all current and future pages in the virtual memory address space. +This may help the daemon remain responsive when the system is under heavy +memory pressure. +.TP +.BI \-Z +Zero the daemon's state, thereby allowing zevents still within the kernel +to be reprocessed. +.\" .TP +.\" .BI \-c\ configfile +.\" Read the configuration from the specified file. +.TP +.BI \-d\ scriptdir +Read the enabled scripts from the specified directory. +.TP +.BI \-p\ pidfile +Write the daemon's process ID to the specified file. +.TP +.BI \-s\ statefile +Write the daemon's state to the specified file. + +.SH ZEVENTS +.PP +A zevent is comprised of a list of name/value pairs (nvpairs). Each zevent +contains an EID (Event IDentifier) that uniquely identifies it throughout +the lifetime of the loaded ZFS kernel module; this EID is a monotonically +increasing integer that resets to 1 each time the kernel module is loaded. +Each zevent also contains a class string that identifies the type of event. +For brevity, a subclass string is defined that omits the leading components +of the class string. Additional nvpairs exist to provide event details. +.PP +The kernel maintains a list of recent zevents that can be viewed (along with +their associated lists of nvpairs) using the "\fBzpool events \-v\fR" command. + +.SH CONFIGURATION +.PP +The scripts to be invoked in response to zevents are located in the +enabled-scripts directory. These can be symlinked or copied from the +installed-scripts directory; symlinks allow for automatic updates from the +installed scripts, whereas copies preserve local modifications. As a security +measure, scripts must be owned by root. They must have execute permissions +for the user, but they must not have write permissions for group or other. +Dotfiles are ignored. +.PP +Scripts are named after the zevent class for which they should be invoked. +In particular, a script will be invoked for a given zevent if either its +class or subclass string is a prefix of its filename (and is followed by +a non-alphabetic character). As a special case, the prefix "all" matches +all zevents. Multiple scripts may be invoked for a given zevent. + +.SH SCRIPTS +.PP +Scripts should be written under the presumption they can be invoked +concurrently, and they should use appropriate locking to access any shared +resources. Common variables used by the scripts can be stored in the default +rc file which is sourced by the scripts; these variables should be prefixed +with "ZED_". +.PP +The zevent nvpairs are passed to the scripts as environment variables. +Each nvpair name is converted to an environment variable in the following +manner: 1) it is prefixed with "ZEVENT_", 2) it is converted to uppercase, +and 3) each non-alphanumeric character is converted to an underscore. +Some additional environment variables have been defined to present certain +nvpair values in a more convenient form. An incomplete list of zevent +environment variables is as follows: +.TP +.B +ZEVENT_EID +The Event IDentifier. +.TP +.B +ZEVENT_CLASS +The zevent class string. +.TP +.B +ZEVENT_SUBCLASS +The zevent subclass string. +.TP +.B +ZEVENT_TIME +The time at which the zevent was posted as +"\fIseconds\fR\ \fInanoseconds\fR" since the Epoch. +.TP +.B +ZEVENT_TIME_SECS +The \fIseconds\fR component of ZEVENT_TIME. +.TP +.B +ZEVENT_TIME_NSECS +The \fInanoseconds\fR component of ZEVENT_TIME. +.TP +.B +ZEVENT_TIME_STRING +An almost-RFC3339-compliant string for ZEVENT_TIME. +.PP +Additionally, the following ZED & ZFS variables are defined: +.TP +.B +ZED_PID +The daemon's process ID. +.TP +.B +ZED_SCRIPT_DIR +The daemon's current enabled-scripts directory. +.TP +.B +ZFS_ALIAS +The ZFS alias (name-ver-rel) string used to build the daemon. +.TP +.B +ZFS_VERSION +The ZFS version used to build the daemon. +.TP +.B +ZFS_RELEASE +The ZFS release used to build the daemon. +.PP +Scripts may need to call other ZFS commands. The installation paths of +the following executables are defined: \fBZDB\fR, \fBZED\fR, \fBZFS\fR, +\fBZINJECT\fR, and \fBZPOOL\fR. These variables can be overridden in the +zed.rc if needed. + +.SH FILES +.\" .TP +.\" @sysconfdir@/zfs/zed.conf +.\" The default configuration file for the daemon. +.TP +.I @sysconfdir@/zfs/zed.d +The default directory for enabled scripts. +.TP +.I @sysconfdir@/zfs/zed.d/zed.rc +The default rc file for common variables used by the scripts. +.TP +.I @libexecdir@/zfs/zed.d +The default directory for installed scripts. +.TP +.I @runstatedir@/zed.pid +The default file containing the daemon's process ID. +.TP +.I @runstatedir@/zed.state +The default file containing the daemon's state. + +.SH SIGNALS +.TP +.B HUP +Reconfigure the daemon and rescan the directory for enabled scripts. +.TP +.B TERM +Terminate the daemon. + +.SH NOTES +.PP +\fBzed\fR requires root privileges. +.\" Do not taunt zed. + +.SH BUGS +.PP +Events are processed synchronously by a single thread. This can delay the +processing of simultaneous zevents. +.PP +There is no maximum timeout for script execution. Consequently, a misbehaving +script can delay the processing of subsequent zevents. +.PP +The ownership and permissions of the enabled-scripts directory (along +with all parent directories) are not checked. If any of these directories +are improperly owned or permissioned, an unprivileged user could insert a +script to be executed as root. The requirement that scripts be owned by +root mitigates this to some extent. +.PP +Scripts are unable to return state/status information to the kernel. +.PP +Some zevent nvpair types are not handled. These are denoted by zevent +environment variables having a "_NOT_IMPLEMENTED_" value. +.PP +Internationalization support via gettext has not been added. +.PP +The configuration file is not yet implemented. +.PP +The diagnosis engine is not yet implemented. + +.SH COPYRIGHT +.PP +Developed at Lawrence Livermore National Laboratory (LLNL\-CODE\-403049). +.br +Copyright (C) 2013\-2014 Lawrence Livermore National Security, LLC. + +.SH LICENSE +.PP +\fBzed\fR (ZFS Event Daemon) is distributed under the terms of the +Common Development and Distribution License (CDDL\-1.0). + +.SH SEE ALSO +.BR zfs (8), +.BR zpool (8) diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index 53b73df7c2fc..5c2196f7b518 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -167,6 +167,7 @@ exit 0 %{_sbindir}/* %{_bindir}/* %{_libdir}/*.so.* +%{_libexecdir}/%{name} %{_mandir}/man1/* %{_mandir}/man5/* %{_mandir}/man8/* diff --git a/scripts/common.sh.in b/scripts/common.sh.in index 3f63fc053422..ae1c5cf09e49 100644 --- a/scripts/common.sh.in +++ b/scripts/common.sh.in @@ -38,6 +38,7 @@ sbindir=@sbindir@ udevdir=@udevdir@ udevruledir=@udevruledir@ sysconfdir=@sysconfdir@ +localstatedir=@localstatedir@ ETCDIR=${ETCDIR:-/etc} DEVDIR=${DEVDIR:-/dev/disk/by-vdev} @@ -72,6 +73,8 @@ SYSCTL=${SYSCTL:-/sbin/sysctl} UDEVADM=${UDEVADM:-/sbin/udevadm} AWK=${AWK:-/usr/bin/awk} +ZED_PIDFILE=${ZED_PIDFILE:-${localstatedir}/run/zed.pid} + COLOR_BLACK="\033[0;30m" COLOR_DK_GRAY="\033[1;30m" COLOR_BLUE="\033[0;34m" @@ -745,3 +748,9 @@ stack_check() { fi fi } + +kill_zed() { + if [ -f $ZED_PIDFILE ]; then + kill $(cat $ZED_PIDFILE) + fi +} diff --git a/scripts/zfs.sh b/scripts/zfs.sh index f44053e888ea..b21d2e7cf596 100755 --- a/scripts/zfs.sh +++ b/scripts/zfs.sh @@ -65,6 +65,7 @@ if [ $(id -u) != 0 ]; then fi if [ ${UNLOAD} ]; then + kill_zed umount -t zfs -a stack_check unload_modules diff --git a/zfs-script-config.sh.in b/zfs-script-config.sh.in index ba676c860791..10d24f027f2f 100644 --- a/zfs-script-config.sh.in +++ b/zfs-script-config.sh.in @@ -36,6 +36,8 @@ ZPIOS_SURVEY_SH=${SCRIPTDIR}/zpios-survey.sh INTREE=1 LDMOD=/sbin/insmod +ZED_PIDFILE=@runstatedir@/zed.pid + KERNEL_MODULES=( \ ${KERNELMOD}/lib/zlib_deflate/zlib_deflate.ko \ ${KERNELMOD}/lib/zlib_inflate/zlib_inflate.ko \ From 11a7043324b3df606b7d7e8f214cbe2eba076446 Mon Sep 17 00:00:00 2001 From: Chris Dunlap Date: Tue, 11 Mar 2014 12:33:32 -0700 Subject: [PATCH 07/11] Add systemd unit file for zed This commit adds a systemd unit file for zed.service and integrates it into the zfs.target from commit 881f45c. Signed-off-by: Chris Dunlap Signed-off-by: Brian Behlendorf Issue #2108 Issue #2 --- etc/systemd/system/Makefile.am | 8 ++++++-- etc/systemd/system/zed.service.in | 13 +++++++++++++ etc/systemd/system/zfs.target.in | 1 + 3 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 etc/systemd/system/zed.service.in diff --git a/etc/systemd/system/Makefile.am b/etc/systemd/system/Makefile.am index 31521ceccb58..b7a8db2435c3 100644 --- a/etc/systemd/system/Makefile.am +++ b/etc/systemd/system/Makefile.am @@ -1,15 +1,17 @@ systemdpreset_DATA = \ $(top_srcdir)/etc/systemd/system/50-zfs.preset systemdunit_DATA = \ - $(top_srcdir)/etc/systemd/system/zfs-import-scan.service \ + $(top_srcdir)/etc/systemd/system/zed.service \ $(top_srcdir)/etc/systemd/system/zfs-import-cache.service \ + $(top_srcdir)/etc/systemd/system/zfs-import-scan.service \ $(top_srcdir)/etc/systemd/system/zfs-mount.service \ $(top_srcdir)/etc/systemd/system/zfs-share.service \ $(top_srcdir)/etc/systemd/system/zfs.target EXTRA_DIST = \ - $(top_srcdir)/etc/systemd/system/zfs-import-scan.service.in \ + $(top_srcdir)/etc/systemd/system/zed.service.in \ $(top_srcdir)/etc/systemd/system/zfs-import-cache.service.in \ + $(top_srcdir)/etc/systemd/system/zfs-import-scan.service.in \ $(top_srcdir)/etc/systemd/system/zfs-mount.service.in \ $(top_srcdir)/etc/systemd/system/zfs-share.service.in \ $(top_srcdir)/etc/systemd/system/zfs.target.in \ @@ -17,12 +19,14 @@ EXTRA_DIST = \ $(systemdunit_DATA): -$(SED) -e 's,@bindir\@,$(bindir),g' \ + -e 's,@runstatedir\@,$(runstatedir),g' \ -e 's,@sbindir\@,$(sbindir),g' \ -e 's,@sysconfdir\@,$(sysconfdir),g' \ '$@.in' >'$@' $(systemdpreset_DATA): -$(SED) -e 's,@bindir\@,$(bindir),g' \ + -e 's,@runstatedir\@,$(runstatedir),g' \ -e 's,@sbindir\@,$(sbindir),g' \ -e 's,@sysconfdir\@,$(sysconfdir),g' \ '$@.in' >'$@' diff --git a/etc/systemd/system/zed.service.in b/etc/systemd/system/zed.service.in new file mode 100644 index 000000000000..78988abb92b2 --- /dev/null +++ b/etc/systemd/system/zed.service.in @@ -0,0 +1,13 @@ +[Unit] +Description=ZFS Event Daemon (zed) +Documentation=man:zed(8) +After=zfs-import-cache.service +After=zfs-import-scan.service + +[Service] +Type=forking +ExecStart=@sbindir@/zed +PIDFile=@runstatedir@/zed.pid +User=root +Group=root +Restart=on-abort diff --git a/etc/systemd/system/zfs.target.in b/etc/systemd/system/zfs.target.in index 7d464873b7cc..35415338560a 100644 --- a/etc/systemd/system/zfs.target.in +++ b/etc/systemd/system/zfs.target.in @@ -2,6 +2,7 @@ Description=ZFS startup target Requires=zfs-mount.service Requires=zfs-share.service +Wants=zed.service [Install] WantedBy=multi-user.target From 1a5c611a2281b792634672a267b9c9cd7b60ef12 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 24 Jan 2014 15:27:59 -0800 Subject: [PATCH 08/11] Make command line guid parsing more tolerant Several of the zfs utilities allow you to pass a vdev's guid rather than the device name. However, the utilities are not consistent in how they parse that guid. For example, 'zinject' expects the guid to be passed as a hex value while 'zpool replace' wants it as a decimal. The user is forced to just know what format to use. This patch improve things by making the parsing more tolerant. When strtol(3) is called using 0 for the base, rather than say 10 or 16, it will then accept hex, decimal, or octal input based on the prefix. From the man page. If base is zero or 16, the string may then include a "0x" prefix, and the number will be read in base 16; otherwise, a zero base is taken as 10 (decimal) unless the next character is '0', in which case it is taken as 8 (octal). NOTE: There may be additional conversions not caught be this patch. Signed-off-by: Brian Behlendorf Signed-off-by: Chris Dunlap Issue #2 --- cmd/zinject/translate.c | 2 +- lib/libzfs/libzfs_pool.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/zinject/translate.c b/cmd/zinject/translate.c index b2ccb673a193..5cc9d9fdc707 100644 --- a/cmd/zinject/translate.c +++ b/cmd/zinject/translate.c @@ -467,7 +467,7 @@ translate_device(const char *pool, const char *device, err_type_t label_type, if ((zhp = zpool_open(g_zfs, pool)) == NULL) return (-1); - record->zi_guid = strtoull(device, &end, 16); + record->zi_guid = strtoull(device, &end, 0); if (record->zi_guid == 0 || *end != '\0') { tgt = zpool_find_vdev(zhp, device, &isspare, &iscache, NULL); diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index db1f0d7cf2be..b1ddd983d895 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -2167,7 +2167,7 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0); - guid = strtoull(path, &end, 10); + guid = strtoull(path, &end, 0); if (guid != 0 && *end == '\0') { verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0); } else if (zpool_vdev_is_interior(path)) { From d21705eab952e5aa1c0bcf920b76e8428384d80b Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 24 Jan 2014 15:43:07 -0800 Subject: [PATCH 09/11] Add missing DATA_TYPE_STRING_ARRAY output This functionality has always been missing. But until now there were no zevents which included an array of strings so it wasn't missed. However, that's now changed so to ensure this information is output correctly by 'zpool events -v' the DATA_TYPE_STRING_ARRAY has been implemented. Signed-off-by: Brian Behlendorf Signed-off-by: Chris Dunlap Issue #2 --- cmd/zpool/zpool_main.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index cc13e3739394..e38213c8520e 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -5439,7 +5439,18 @@ zpool_do_events_nvprint(nvlist_t *nvl, int depth) break; } - case DATA_TYPE_STRING_ARRAY: + case DATA_TYPE_STRING_ARRAY: { + char **str; + uint_t i, nelem; + + (void) nvpair_value_string_array(nvp, &str, &nelem); + for (i = 0; i < nelem; i++) + printf(gettext("\"%s\" "), + str[i] ? str[i] : ""); + + break; + } + case DATA_TYPE_BOOLEAN_ARRAY: case DATA_TYPE_BYTE_ARRAY: case DATA_TYPE_DOUBLE: From 904ea2763e6576f6971be4a684e6765aaea5221c Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 24 Jan 2014 15:47:46 -0800 Subject: [PATCH 10/11] Add automatic hot spare functionality When a vdev starts getting I/O or checksum errors it is now possible to automatically rebuild to a hot spare device. To cleanly support this functionality in a shell script some additional information was added to all zevent ereports which include a vdev. This covers both io and checksum zevents but may be used but other scripts. In the Illumos FMA solution the same information is required but it is retrieved through the libzfs library interface. Specifically the following members were added: vdev_spare_paths - List of vdev paths for all hot spares. vdev_spare_guids - List of vdev guids for all hot spares. vdev_read_errors - Read errors for the problematic vdev vdev_write_errors - Write errors for the problematic vdev vdev_cksum_errors - Checksum errors for the problematic vdev. By default the required hot spare scripts are installed but this functionality is disabled. To enable hot sparing uncomment the ZED_SPARE_ON_IO_ERRORS and ZED_SPARE_ON_CHECKSUM_ERRORS in the /etc/zfs/zed.d/zed.rc configuration file. These scripts do no add support for the autoexpand property. At a minimum this requires adding a new udev rule to detect when a new device is added to the system. It also requires that the autoexpand policy be ported from Illumos, see: https://github.com/illumos/illumos-gate/blob/master/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c Support for detecting the correct name of a vdev when it's not a whole disk was added by Turbo Fredriksson. Signed-off-by: Brian Behlendorf Signed-off-by: Chris Dunlap Signed-off-by: Turbo Fredriksson Issue #2 --- cmd/zed/Makefile.am | 4 + cmd/zed/zed.d/checksum-spare.sh | 1 + cmd/zed/zed.d/io-spare.sh | 125 ++++++++++++++++++++++++++++++++ cmd/zed/zed.d/zed.rc | 6 ++ include/sys/fm/fs/zfs.h | 5 ++ module/zfs/spa.c | 2 +- module/zfs/zfs_fm.c | 56 ++++++++++++-- 7 files changed, 190 insertions(+), 9 deletions(-) create mode 120000 cmd/zed/zed.d/checksum-spare.sh create mode 100755 cmd/zed/zed.d/io-spare.sh diff --git a/cmd/zed/Makefile.am b/cmd/zed/Makefile.am index 8e4efe919502..f1404dea1363 100644 --- a/cmd/zed/Makefile.am +++ b/cmd/zed/Makefile.am @@ -39,17 +39,21 @@ dist_zedexec_SCRIPTS = \ $(top_srcdir)/cmd/zed/zed.d/all-debug.sh \ $(top_srcdir)/cmd/zed/zed.d/all-syslog.sh \ $(top_srcdir)/cmd/zed/zed.d/checksum-email.sh \ + $(top_srcdir)/cmd/zed/zed.d/checksum-spare.sh \ $(top_srcdir)/cmd/zed/zed.d/data-email.sh \ $(top_srcdir)/cmd/zed/zed.d/generic-email.sh \ $(top_srcdir)/cmd/zed/zed.d/io-email.sh \ + $(top_srcdir)/cmd/zed/zed.d/io-spare.sh \ $(top_srcdir)/cmd/zed/zed.d/resilver.finish-email.sh \ $(top_srcdir)/cmd/zed/zed.d/scrub.finish-email.sh zedconfdefaults = \ all-syslog.sh \ checksum-email.sh \ + checksum-spare.sh \ data-email.sh \ io-email.sh \ + io-spare.sh \ resilver.finish-email.sh \ scrub.finish-email.sh diff --git a/cmd/zed/zed.d/checksum-spare.sh b/cmd/zed/zed.d/checksum-spare.sh new file mode 120000 index 000000000000..f564f932283c --- /dev/null +++ b/cmd/zed/zed.d/checksum-spare.sh @@ -0,0 +1 @@ +io-spare.sh \ No newline at end of file diff --git a/cmd/zed/zed.d/io-spare.sh b/cmd/zed/zed.d/io-spare.sh new file mode 100755 index 000000000000..dd5bf4e0f226 --- /dev/null +++ b/cmd/zed/zed.d/io-spare.sh @@ -0,0 +1,125 @@ +#!/bin/sh +# +# Replace a device with a hot spare in response to IO or checksum errors. +# The following actions will be performed automatically when the number +# of errors exceed the limit set by ZED_SPARE_ON_IO_ERRORS or +# ZED_SPARE_ON_CHECKSUM_ERRORS. +# +# 1) FAULT the device on IO errors, no futher IO will be attempted. +# DEGRADE the device on checksum errors, the device is still +# functional and can be used to service IO requests. +# 2) Set the SES fault beacon for the device. +# 3) Replace the device with a hot spare if any are available. +# +# Once the hot sparing operation is complete either the failed device or +# the hot spare must be manually retired using the 'zpool detach' command. +# The 'autoreplace' functionality which would normally take care of this +# under Illumos has not yet been implemented. +# +# Full support for autoreplace is planned, but it requires that the full +# ZFS Diagnosis Engine be ported. In the meanwhile this script provides +# the majority of the expected hot spare functionality. +# +# Exit codes: +# 0: replaced by hot spare +# 1: no hot spare device available +# 2: hot sparing disabled +# 3: already faulted or degraded +# 4: unsupported event class +# 5: internal error +# +test -f "${ZED_SCRIPT_DIR}/zed.rc" && . "${ZED_SCRIPT_DIR}/zed.rc" + +test -n "${ZEVENT_POOL}" || exit 5 +test -n "${ZEVENT_SUBCLASS}" || exit 5 +test -n "${ZEVENT_VDEV_PATH}" || exit 5 +test -n "${ZEVENT_VDEV_GUID}" || exit 5 + +# Defaults to disabled, enable in the zed.rc file. +ZED_SPARE_ON_IO_ERRORS=${ZED_SPARE_ON_IO_ERRORS:-0} +ZED_SPARE_ON_CHECKSUM_ERRORS=${ZED_SPARE_ON_CHECKSUM_ERRORS:-0} + +if [ ${ZED_SPARE_ON_IO_ERRORS} -eq 0 -a \ + ${ZED_SPARE_ON_CHECKSUM_ERRORS} -eq 0 ]; then + exit 2 +fi + +# A lock file is used to serialize execution. +ZED_LOCKDIR=${ZED_LOCKDIR:-/var/lock} +LOCKFILE="${ZED_LOCKDIR}/zed.spare.lock" + +exec 8> "${LOCKFILE}" +flock -x 8 + +# Given a and return the status, (ONLINE, FAULTED, etc...). +vdev_status() { + local POOL=$1 + local VDEV=`basename $2` + + ${ZPOOL} status ${POOL} | \ + awk -v pat="${VDEV}|${VDEV/-part?}" '$0 ~ pat { print $1" "$2 }' + return 0 +} + +# Fault devices after N I/O errors. +if [ "${ZEVENT_CLASS}" = "ereport.fs.zfs.io" ]; then + ERRORS=`expr ${ZEVENT_VDEV_READ_ERRORS} + ${ZEVENT_VDEV_WRITE_ERRORS}` + + if [ ${ZED_SPARE_ON_IO_ERRORS} -gt 0 -a \ + ${ERRORS} -ge ${ZED_SPARE_ON_IO_ERRORS} ]; then + ACTION="fault" + fi +# Degrade devices after N checksum errors. +elif [ "${ZEVENT_CLASS}" = "ereport.fs.zfs.checksum" ]; then + ERRORS=${ZEVENT_VDEV_CKSUM_ERRORS} + + if [ ${ZED_SPARE_ON_CHECKSUM_ERRORS} -gt 0 -a \ + ${ERRORS} -ge ${ZED_SPARE_ON_CHECKSUM_ERRORS} ]; then + ACTION="degrade" + fi +else + ACTION= +fi + +if [ -n "${ACTION}" ]; then + + # Device is already FAULTED or DEGRADED + set -- `vdev_status ${ZEVENT_POOL} ${ZEVENT_VDEV_PATH}` + ZEVENT_VDEV_PATH_FOUND=$1 + STATUS=$2 + if [ "${STATUS}" = "FAULTED" -o "${STATUS}" = "DEGRADED" ]; then + exit 3 + fi + + # Step 1) FAULT or DEGRADE the device + # + ${ZINJECT} -d ${ZEVENT_VDEV_GUID} -A ${ACTION} ${ZEVENT_POOL} + + # Step 2) Set the SES fault beacon. + # + # XXX: Set the 'fault' or 'ident' beacon for the device. This can + # be done through the sg_ses utility, the only hard part is to map + # the sd device to its corresponding enclosure and slot. We may + # be able to leverage the existing vdev_id scripts for this. + # + # $ sg_ses --dev-slot-num=0 --set=ident /dev/sg3 + # $ sg_ses --dev-slot-num=0 --clear=ident /dev/sg3 + + # Step 3) Replace the device with a hot spare. + # + # Round robin through the spares selecting those which are available. + # + for SPARE in ${ZEVENT_VDEV_SPARE_PATHS}; do + set -- `vdev_status ${ZEVENT_POOL} ${SPARE}` + SPARE_VDEV_FOUND=$1 + STATUS=$2 + if [ "${STATUS}" = "AVAIL" ]; then + ${ZPOOL} replace ${ZEVENT_POOL} \ + ${ZEVENT_VDEV_GUID} ${SPARE_VDEV_FOUND} && exit 0 + fi + done + + exit 1 +fi + +exit 4 diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc index 57c969c89900..69989f95315b 100644 --- a/cmd/zed/zed.d/zed.rc +++ b/cmd/zed/zed.d/zed.rc @@ -26,3 +26,9 @@ # The syslog tag for marking zed events. #ZED_SYSLOG_TAG="zed" + +# Replace a device with a hot spare after N I/O errors are detected. +#ZED_SPARE_ON_IO_ERRORS=1 + +# Replace a device with a hot spare after N checksum errors are detected. +#ZED_SPARE_ON_CHECKSUM_ERRORS=10 diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h index d9122ac5f7d3..d541b07a3729 100644 --- a/include/sys/fm/fs/zfs.h +++ b/include/sys/fm/fs/zfs.h @@ -75,6 +75,11 @@ extern "C" { #define FM_EREPORT_PAYLOAD_ZFS_VDEV_ASHIFT "vdev_ashift" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_COMP_TS "vdev_complete_ts" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_DELTA_TS "vdev_delta_ts" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_SPARE_PATHS "vdev_spare_paths" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_SPARE_GUIDS "vdev_spare_guids" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_READ_ERRORS "vdev_read_errors" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_WRITE_ERRORS "vdev_write_errors" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_ERRORS "vdev_cksum_errors" #define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid" #define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type" #define FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH "parent_path" diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 9e7a7b785a32..af93b7ce5e11 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1377,7 +1377,7 @@ spa_load_spares(spa_t *spa) * validate each vdev on the spare list. If the vdev also exists in the * active configuration, then we also mark this vdev as an active spare. */ - spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), + spa->spa_spares.sav_vdevs = kmem_zalloc(nspares * sizeof (void *), KM_PUSHPAGE); for (i = 0; i < spa->spa_spares.sav_count; i++) { VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index df47d99cfafa..05ee84c19e4d 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -251,6 +251,11 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, if (vd != NULL) { vdev_t *pvd = vd->vdev_parent; vdev_queue_t *vq = &vd->vdev_queue; + vdev_stat_t *vs = &vd->vdev_stat; + vdev_t *spare_vd; + uint64_t *spare_guids; + char **spare_paths; + int i, spare_count; fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, DATA_TYPE_UINT64, vd->vdev_guid, @@ -282,6 +287,16 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, DATA_TYPE_UINT64, vq->vq_io_delta_ts, NULL); } + if (vs != NULL) { + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_READ_ERRORS, + DATA_TYPE_UINT64, vs->vs_read_errors, + FM_EREPORT_PAYLOAD_ZFS_VDEV_WRITE_ERRORS, + DATA_TYPE_UINT64, vs->vs_write_errors, + FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_ERRORS, + DATA_TYPE_UINT64, vs->vs_checksum_errors, NULL); + } + if (pvd != NULL) { fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID, @@ -298,6 +313,28 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID, DATA_TYPE_STRING, pvd->vdev_devid, NULL); } + + spare_count = spa->spa_spares.sav_count; + spare_paths = kmem_zalloc(sizeof (char *) * spare_count, + KM_PUSHPAGE); + spare_guids = kmem_zalloc(sizeof (uint64_t) * spare_count, + KM_PUSHPAGE); + + for (i = 0; i < spare_count; i++) { + spare_vd = spa->spa_spares.sav_vdevs[i]; + if (spare_vd) { + spare_paths[i] = spare_vd->vdev_path; + spare_guids[i] = spare_vd->vdev_guid; + } + } + + fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_SPARE_PATHS, + DATA_TYPE_STRING_ARRAY, spare_count, spare_paths, + FM_EREPORT_PAYLOAD_ZFS_VDEV_SPARE_GUIDS, + DATA_TYPE_UINT64_ARRAY, spare_count, spare_guids, NULL); + + kmem_free(spare_guids, sizeof (uint64_t) * spare_count); + kmem_free(spare_paths, sizeof (char *) * spare_count); } if (zio != NULL) { @@ -834,15 +871,18 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *name) (void) snprintf(class, sizeof (class), "%s.%s.%s", FM_RSRC_RESOURCE, ZFS_ERROR_CLASS, name); - VERIFY(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION) == 0); - VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0); - VERIFY(nvlist_add_uint64(resource, - FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0); + VERIFY0(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION)); + VERIFY0(nvlist_add_string(resource, FM_CLASS, class)); + VERIFY0(nvlist_add_uint64(resource, + FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa))); + VERIFY0(nvlist_add_int32(resource, + FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, spa_load_state(spa))); + if (vd) { - VERIFY(nvlist_add_uint64(resource, - FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0); - VERIFY(nvlist_add_uint64(resource, - FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, vd->vdev_state) == 0); + VERIFY0(nvlist_add_uint64(resource, + FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid)); + VERIFY0(nvlist_add_uint64(resource, + FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, vd->vdev_state)); } zfs_zevent_post(resource, NULL, zfs_zevent_post_cb); From 518eba14928ddf2c1871d33d4b0cdff7ec45bc23 Mon Sep 17 00:00:00 2001 From: Chris Dunlap Date: Tue, 1 Apr 2014 14:21:56 -0700 Subject: [PATCH 11/11] Replace check for _POSIX_MEMLOCK w/ HAVE_MLOCKALL zed supports a '-M' cmdline opt to lock all pages in memory via mlockall(). The _POSIX_MEMLOCK define is checked to determine whether this function is supported. The current test assumes mlockall() is supported if _POSIX_MEMLOCK is non-zero. However, this test is insufficient according to mlock(2) and sysconf(3). If _POSIX_MEMLOCK is -1, mlockall() is not supported; but if _POSIX_MEMLOCK is 0, availability must be checked at runtime. This commit adds an autoconf check for mlockall() to user.m4. The zed code block for mlockall() is now guarded with a test for HAVE_MLOCKALL. If defined, mlockall() will be called and its runtime availability checked via its return value. Signed-off-by: Chris Dunlap Signed-off-by: Brian Behlendorf Issue #2 --- cmd/zed/zed.c | 9 ++++----- config/user.m4 | 3 +++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/cmd/zed/zed.c b/cmd/zed/zed.c index d2fc0e899717..c54a59b0a7b7 100644 --- a/cmd/zed/zed.c +++ b/cmd/zed/zed.c @@ -97,10 +97,7 @@ _setup_sig_handlers(void) static void _lock_memory(void) { -#if ! _POSIX_MEMLOCK - zed_log_die("Failed to lock memory pages: mlockall() not supported"); - -#else /* _POSIX_MEMLOCK */ +#if HAVE_MLOCKALL int i = 0; const int max_tries = 10; @@ -114,7 +111,9 @@ _lock_memory(void) } zed_log_die("Failed to lock memory pages: %s", strerror(errno)); -#endif /* _POSIX_MEMLOCK */ +#else /* HAVE_MLOCKALL */ + zed_log_die("Failed to lock memory pages: mlockall() not supported"); +#endif /* HAVE_MLOCKALL */ } /* diff --git a/config/user.m4 b/config/user.m4 index 6c127820c94d..38024370bbb4 100644 --- a/config/user.m4 +++ b/config/user.m4 @@ -13,4 +13,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER], [ ZFS_AC_CONFIG_USER_LIBBLKID ZFS_AC_CONFIG_USER_FRAME_LARGER_THAN ZFS_AC_CONFIG_USER_RUNSTATEDIR +dnl # +dnl # Checks for library functions + AC_CHECK_FUNCS([mlockall]) ])