From 011b6a6dc77a7a2874d684b29c5b1076a96485fe Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Sat, 25 Apr 2020 09:38:14 -0400 Subject: [PATCH] udevd: add short-duration inotify watch during udev block device event processing While external programs that take an exclusive flock on block devices while modifying them (e.g. partitioning or mkfs) will safely work with udevd so that the block device modifications don't race with udevd processing of the device (e.g. creating symlinks for the newly-created partitions), any external program that doesn't take an exclusive flock will race with udevd, and the changes made to the block device may be missed by udevd, leading to failures, e.g. udevd might not create the symlinks for new partitions, or might not create the /dev/disk/by-* for new filesystems. This updates the flock function to also take a short-duration inotify watch, so that after processing the device, udevd can synthesize a new uevent if it detected any IN_CLOSE_WRITE while the device was being processed, before the real watch was added. One example is the mkswap that we ourselves actually run, from the service created by cryptsetup-generator; we have it running mkswap: if (swap) fprintf(f, "ExecStartPost=/sbin/mkswap '/dev/mapper/%s'\n", name_escaped); However, this is racy, because it doesn't take an exclusive flock. This (and probably other places in our own code) should have done "flock ..." instead. If it's hard for us to get this right, it seems too much to expect all other non-systemd programs to also be aware they need to flock the block device. Fixes: #10179 --- src/udev/udevd.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/src/udev/udevd.c b/src/udev/udevd.c index cfda47f849b25..628ffaf64c11b 100644 --- a/src/udev/udevd.c +++ b/src/udev/udevd.c @@ -125,6 +125,7 @@ struct event { }; static void event_queue_cleanup(Manager *manager, enum event_state type); +static int synthesize_change(sd_device *dev); enum worker_state { WORKER_UNDEF, @@ -319,13 +320,15 @@ static int worker_send_message(int fd) { return loop_write(fd, &message, sizeof(message), false); } -static int worker_lock_block_device(sd_device *dev, int *ret_fd) { +static int worker_lock_and_watch_block_device(sd_device *dev, int *ret_fd, int *watch_fd) { _cleanup_close_ int fd = -1; + _cleanup_close_ int ifd = -1; const char *val; int r; assert(dev); assert(ret_fd); + assert(watch_fd); /* * Take a shared lock on the device node; this establishes @@ -377,13 +380,22 @@ static int worker_lock_block_device(sd_device *dev, int *ret_fd) { if (flock(fd, LOCK_SH|LOCK_NB) < 0) return log_device_debug_errno(dev, errno, "Failed to flock(%s): %m", val); + ifd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC); + if (ifd < 0) + return log_device_debug_errno(dev, errno, "Failed to inotify_init: %m"); + + if (inotify_add_watch(ifd, val, IN_CLOSE_WRITE) < 0) + return log_device_debug_errno(dev, errno, "Failed to inotify_add_watch(%s): %m", val); + *ret_fd = TAKE_FD(fd); + *watch_fd = TAKE_FD(ifd); return 1; } static int worker_process_device(Manager *manager, sd_device *dev) { _cleanup_(udev_event_freep) UdevEvent *udev_event = NULL; _cleanup_close_ int fd_lock = -1; + _cleanup_close_ int fd_watch = -1; DeviceAction action; uint64_t seqnum; int r; @@ -406,7 +418,18 @@ static int worker_process_device(Manager *manager, sd_device *dev) { if (!udev_event) return -ENOMEM; - r = worker_lock_block_device(dev, &fd_lock); + /* We take a shared flock on any block devices, so any external program that modifies the block + * device race with us while we're processing it, below, if the external program takes an + * exclusive flock on the block device. However, any external program that modifies the block + * device *without* taking an exclusive flock can still race with us, and we may miss an update + * of the block device (leading to missing /dev/disk symlinks, etc). So we also set a inotify + * watch that we check after processing. + * + * Note that we can't use the real watch this early, because 1) udev_event_execute_rules removes + * the watch and 2) we don't know if we should start the real watch yet, as ->inotify_watch is + * set during udev_event_execute_rules. + */ + r = worker_lock_and_watch_block_device(dev, &fd_lock, &fd_watch); if (r < 0) return r; @@ -432,6 +455,30 @@ static int worker_process_device(Manager *manager, sd_device *dev) { log_device_debug(dev, "Device (SEQNUM=%"PRIu64", ACTION=%s) processed", seqnum, device_action_to_string(action)); + /* We've processed the block device now, and we started the real watch, so we just + * need to check if there was any IN_CLOSE_WRITE that we missed while processing, + * before the real watch was started. + */ + if (fd_watch >=0) { + union inotify_event_buffer buffer; + struct inotify_event *e; + bool change = false; + + while (!change) { + r = read(fd_watch, &buffer, sizeof(buffer)); + if (r < 0) + break; + FOREACH_INOTIFY_EVENT(e, buffer, r) + if (e->mask & IN_CLOSE_WRITE) { + change = true; + break; + } + } + + if (change) + synthesize_change(dev); + } + return 0; }