diff --git a/.clang-tidy b/.clang-tidy index 091c20ff2..385209b91 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -123,4 +123,4 @@ CheckOptions: value: true # Allow specific masks - key: readability-magic-numbers.IgnoredIntegerValues - value: 255;65535;100 + value: 3;255;65535;100 diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index b5a72046a..acb03aef1 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -88,6 +88,8 @@ set(doc_srcs ${CMAKE_CURRENT_SOURCE_DIR}/developers/style.rst ${CMAKE_CURRENT_SOURCE_DIR}/developers/tests.rst ${CMAKE_CURRENT_SOURCE_DIR}/developers/modules/index.rst + ${CMAKE_CURRENT_SOURCE_DIR}/developers/modules/core/index.rst + ${CMAKE_CURRENT_SOURCE_DIR}/developers/modules/core/locking.rst ${CMAKE_CURRENT_SOURCE_DIR}/developers/modules/libbpfilter/libbpfilter.rst ${CMAKE_CURRENT_SOURCE_DIR}/external/benchmarks/index.rst ${CMAKE_CURRENT_SOURCE_DIR}/external/coverage/index.rst diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 8d5d7b540..3c502af5f 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -3,7 +3,7 @@ PROJECT_NAME = "@PROJECT_NAME@" PROJECT_BRIEF = An eBPF-based packet filtering framework OUTPUT_DIRECTORY = "@CMAKE_CURRENT_BINARY_DIR@" STRIP_FROM_PATH = "@CMAKE_SOURCE_DIR@/src" "@CMAKE_SOURCE_DIR@/tests" -STRIP_FROM_INC_PATH = "@CMAKE_SOURCE_DIR@/src" "@CMAKE_SOURCE_DIR@/tests" +STRIP_FROM_INC_PATH = "@CMAKE_SOURCE_DIR@/src/libbpfilter/include" "@CMAKE_SOURCE_DIR@/tests" OPTIMIZE_OUTPUT_FOR_C = YES AUTOLINK_SUPPORT = YES IDL_PROPERTY_SUPPORT = NO diff --git a/doc/developers/modules/core/index.rst b/doc/developers/modules/core/index.rst new file mode 100644 index 000000000..0841bd007 --- /dev/null +++ b/doc/developers/modules/core/index.rst @@ -0,0 +1,10 @@ +Core +==== + +Core building blocks of the project. + +.. toctree:: + :titlesonly: + :glob: + + * diff --git a/doc/developers/modules/core/locking.rst b/doc/developers/modules/core/locking.rst new file mode 100644 index 000000000..dfe206caf --- /dev/null +++ b/doc/developers/modules/core/locking.rst @@ -0,0 +1,5 @@ +Locking +======= + +.. doxygenfile:: core/lock.h + :sections: briefdescription detaileddescription innerclass public-type public-attrib typedef enum define var func diff --git a/doc/developers/modules/index.rst b/doc/developers/modules/index.rst index 8f14d85d8..c291fb826 100644 --- a/doc/developers/modules/index.rst +++ b/doc/developers/modules/index.rst @@ -6,6 +6,7 @@ Modules :caption: Modules libbpfilter/libbpfilter + core/index ``bpfilter`` is composed of multiple modules depending on each other. Splitting the project in different modules allows for the source code to be efficiently reused: diff --git a/src/libbpfilter/CMakeLists.txt b/src/libbpfilter/CMakeLists.txt index 51cb54c27..13729711a 100644 --- a/src/libbpfilter/CMakeLists.txt +++ b/src/libbpfilter/CMakeLists.txt @@ -49,6 +49,7 @@ set(libbpfilter_srcs ${CMAKE_CURRENT_SOURCE_DIR}/io.c ${CMAKE_CURRENT_SOURCE_DIR}/core/hashset.c ${CMAKE_CURRENT_SOURCE_DIR}/core/list.c + ${CMAKE_CURRENT_SOURCE_DIR}/core/lock.h ${CMAKE_CURRENT_SOURCE_DIR}/core/lock.c ${CMAKE_CURRENT_SOURCE_DIR}/core/vector.c ${CMAKE_CURRENT_SOURCE_DIR}/logger.c ${CMAKE_CURRENT_SOURCE_DIR}/matcher.c diff --git a/src/libbpfilter/cgen/cgen.c b/src/libbpfilter/cgen/cgen.c index 20aa0c791..5d7af7046 100644 --- a/src/libbpfilter/cgen/cgen.c +++ b/src/libbpfilter/cgen/cgen.c @@ -18,11 +18,9 @@ #include #include #include -#include #include #include #include -#include #include #include @@ -31,29 +29,12 @@ #include "cgen/prog/link.h" #include "cgen/prog/map.h" #include "cgen/program.h" +#include "core/lock.h" #define _BF_PROG_NAME "bf_prog" #define _BF_CTX_PIN_NAME "bf_ctx" #define _BF_CTX_TMP_PIN_NAME "bf_ctx_tmp" -static int _bf_cgen_get_chain_pindir_fd(const char *name) -{ - _cleanup_close_ int bf_fd = -1; - _cleanup_close_ int chain_fd = -1; - - assert(name); - - bf_fd = bf_ctx_get_pindir_fd(); - if (bf_fd < 0) - return bf_fd; - - chain_fd = bf_opendir_at(bf_fd, name, true); - if (chain_fd < 0) - return chain_fd; - - return TAKE_FD(chain_fd); -} - /** * @brief Persist the codegen state to a BPF context map in bpffs. * @@ -116,14 +97,15 @@ static int _bf_cgen_persist(const struct bf_cgen *cgen, int dir_fd) return 0; } -static int _bf_cgen_new_from_pack(struct bf_cgen **cgen, bf_rpack_node_t node) +static int _bf_cgen_new_from_pack(struct bf_cgen **cgen, struct bf_lock *lock, + bf_rpack_node_t node) { _free_bf_cgen_ struct bf_cgen *_cgen = NULL; - _cleanup_close_ int dir_fd = -1; bf_rpack_node_t child; int r; assert(cgen); + assert(lock); _cgen = calloc(1, sizeof(*_cgen)); if (!_cgen) @@ -141,13 +123,7 @@ static int _bf_cgen_new_from_pack(struct bf_cgen **cgen, bf_rpack_node_t node) if (r) return bf_rpack_key_err(r, "bf_cgen.handle"); - dir_fd = _bf_cgen_get_chain_pindir_fd(_cgen->chain->name); - if (dir_fd < 0) { - return bf_err_r(dir_fd, "failed to open chain pin directory for '%s'", - _cgen->chain->name); - } - - r = bf_handle_new_from_pack(&_cgen->handle, dir_fd, child); + r = bf_handle_new_from_pack(&_cgen->handle, lock, child); if (r) return r; @@ -156,7 +132,7 @@ static int _bf_cgen_new_from_pack(struct bf_cgen **cgen, bf_rpack_node_t node) return 0; } -int bf_cgen_new_from_dir_fd(struct bf_cgen **cgen, int dir_fd) +int bf_cgen_new_from_dir_fd(struct bf_cgen **cgen, struct bf_lock *lock) { _free_bf_rpack_ bf_rpack_t *pack = NULL; _cleanup_close_ int map_fd = -1; @@ -166,9 +142,9 @@ int bf_cgen_new_from_dir_fd(struct bf_cgen **cgen, int dir_fd) int r; assert(cgen); - assert(dir_fd >= 0); + assert(lock); - r = bf_bpf_obj_get(_BF_CTX_PIN_NAME, dir_fd, &map_fd); + r = bf_bpf_obj_get(_BF_CTX_PIN_NAME, lock->chain_fd, &map_fd); if (r < 0) return bf_err_r(r, "failed to open pinned context map"); @@ -191,7 +167,7 @@ int bf_cgen_new_from_dir_fd(struct bf_cgen **cgen, int dir_fd) if (r) return bf_err_r(r, "failed to create rpack for bf_cgen"); - r = _bf_cgen_new_from_pack(cgen, bf_rpack_root(pack)); + r = _bf_cgen_new_from_pack(cgen, lock, bf_rpack_root(pack)); if (r) return bf_err_r(r, "failed to deserialize cgen from context map"); @@ -223,21 +199,11 @@ int bf_cgen_new(struct bf_cgen **cgen, struct bf_chain **chain) void bf_cgen_free(struct bf_cgen **cgen) { - _cleanup_close_ int pin_fd = -1; - assert(cgen); if (!*cgen) return; - /* Perform a non-recursive removal of the chain's pin directory: if - * the chain hasn't been pinned (e.g. due to a failure), the pin directory - * will be empty and will be removed. If the chain is valid and pinned, then - * the removal of the pin directory will fail, but that's alright. */ - pin_fd = bf_ctx_get_pindir_fd(); - if (pin_fd >= 0) - bf_rmdir_at(pin_fd, (*cgen)->chain->name, false); - bf_handle_free(&(*cgen)->handle); bf_chain_free(&(*cgen)->chain); @@ -305,17 +271,14 @@ int bf_cgen_get_counter(const struct bf_cgen *cgen, return bf_handle_get_counter(cgen->handle, counter_idx, counter); } -int bf_cgen_set(struct bf_cgen *cgen, struct bf_hookopts **hookopts) +int bf_cgen_set(struct bf_cgen *cgen, struct bf_hookopts **hookopts, + struct bf_lock *lock) { _free_bf_program_ struct bf_program *prog = NULL; - _cleanup_close_ int pindir_fd = -1; int r; assert(cgen); - - pindir_fd = _bf_cgen_get_chain_pindir_fd(cgen->chain->name); - if (pindir_fd < 0) - return pindir_fd; + assert(lock); r = bf_program_new(&prog, cgen->chain, cgen->handle); if (r < 0) @@ -335,13 +298,13 @@ int bf_cgen_set(struct bf_cgen *cgen, struct bf_hookopts **hookopts) return bf_err_r(r, "failed to load and attach the chain"); } - r = bf_handle_pin(cgen->handle, pindir_fd); + r = bf_handle_pin(cgen->handle, lock); if (r) return r; - r = _bf_cgen_persist(cgen, pindir_fd); + r = _bf_cgen_persist(cgen, lock->chain_fd); if (r) { - bf_handle_unpin(cgen->handle, pindir_fd); + bf_handle_unpin(cgen->handle, lock); return bf_err_r(r, "failed to persist cgen for '%s'", cgen->chain->name); } @@ -349,17 +312,13 @@ int bf_cgen_set(struct bf_cgen *cgen, struct bf_hookopts **hookopts) return 0; } -int bf_cgen_load(struct bf_cgen *cgen) +int bf_cgen_load(struct bf_cgen *cgen, struct bf_lock *lock) { _free_bf_program_ struct bf_program *prog = NULL; - _cleanup_close_ int pindir_fd = -1; int r; assert(cgen); - - pindir_fd = _bf_cgen_get_chain_pindir_fd(cgen->chain->name); - if (pindir_fd < 0) - return pindir_fd; + assert(lock); r = bf_program_new(&prog, cgen->chain, cgen->handle); if (r < 0) @@ -373,13 +332,13 @@ int bf_cgen_load(struct bf_cgen *cgen) if (r < 0) return bf_err_r(r, "failed to load the chain"); - r = bf_handle_pin(cgen->handle, pindir_fd); + r = bf_handle_pin(cgen->handle, lock); if (r) return r; - r = _bf_cgen_persist(cgen, pindir_fd); + r = _bf_cgen_persist(cgen, lock->chain_fd); if (r) { - bf_handle_unpin(cgen->handle, pindir_fd); + bf_handle_unpin(cgen->handle, lock); return bf_err_r(r, "failed to persist cgen for '%s'", cgen->chain->name); } @@ -390,35 +349,32 @@ int bf_cgen_load(struct bf_cgen *cgen) return 0; } -int bf_cgen_attach(struct bf_cgen *cgen, struct bf_hookopts **hookopts) +int bf_cgen_attach(struct bf_cgen *cgen, struct bf_hookopts **hookopts, + struct bf_lock *lock) { - _cleanup_close_ int pindir_fd = -1; int r; assert(cgen); assert(hookopts); + assert(lock); bf_info("attaching %s to %s", cgen->chain->name, bf_hook_to_str(cgen->chain->hook)); bf_hookopts_dump(*hookopts, EMPTY_PREFIX); - pindir_fd = _bf_cgen_get_chain_pindir_fd(cgen->chain->name); - if (pindir_fd < 0) - return pindir_fd; - r = bf_handle_attach(cgen->handle, cgen->chain->hook, hookopts); if (r < 0) return bf_err_r(r, "failed to attach chain '%s'", cgen->chain->name); - r = bf_link_pin(cgen->handle->link, pindir_fd); + r = bf_link_pin(cgen->handle->link, lock); if (r) { bf_handle_detach(cgen->handle); return r; } - r = _bf_cgen_persist(cgen, pindir_fd); + r = _bf_cgen_persist(cgen, lock->chain_fd); if (r) { - bf_link_unpin(cgen->handle->link, pindir_fd); + bf_link_unpin(cgen->handle->link, lock); bf_handle_detach(cgen->handle); return bf_err_r(r, "failed to persist cgen for '%s'", cgen->chain->name); @@ -471,26 +427,22 @@ static int _bf_cgen_transfer_counters(const struct bf_handle *old_handle, } int bf_cgen_update(struct bf_cgen *cgen, struct bf_chain **new_chain, - uint32_t flags) + uint32_t flags, struct bf_lock *lock) { _free_bf_program_ struct bf_program *new_prog = NULL; _free_bf_handle_ struct bf_handle *new_handle = NULL; - _cleanup_close_ int pindir_fd = -1; struct bf_handle *old_handle; int r; assert(cgen); assert(new_chain); + assert(lock); if (flags & ~BF_FLAGS_MASK(_BF_CGEN_UPDATE_MAX)) return bf_err_r(-EINVAL, "unknown update flags: 0x%x", flags); old_handle = cgen->handle; - pindir_fd = _bf_cgen_get_chain_pindir_fd((*new_chain)->name); - if (pindir_fd < 0) - return pindir_fd; - r = bf_handle_new(&new_handle, _BF_PROG_NAME); if (r) return r; @@ -522,13 +474,13 @@ int bf_cgen_update(struct bf_cgen *cgen, struct bf_chain **new_chain, return bf_err_r(r, "failed to transfer counters"); } - bf_handle_unpin(old_handle, pindir_fd); + bf_handle_unpin(old_handle, lock); if (old_handle->link) { r = bf_link_update(old_handle->link, new_handle->prog_fd); if (r) { bf_err_r(r, "failed to update bf_link object with new program"); - if (bf_handle_pin(old_handle, pindir_fd) < 0) + if (bf_handle_pin(old_handle, lock) < 0) bf_err("failed to repin old handle, ignoring"); return r; } @@ -539,13 +491,13 @@ int bf_cgen_update(struct bf_cgen *cgen, struct bf_chain **new_chain, bf_swap(cgen->handle, new_handle); - r = bf_handle_pin(cgen->handle, pindir_fd); + r = bf_handle_pin(cgen->handle, lock); if (r) return bf_err_r(r, "failed to pin new handle"); - r = _bf_cgen_persist(cgen, pindir_fd); + r = _bf_cgen_persist(cgen, lock->chain_fd); if (r) { - bf_handle_unpin(cgen->handle, pindir_fd); + bf_handle_unpin(cgen->handle, lock); return bf_err_r(r, "failed to persist cgen for '%s'", cgen->chain->name); } @@ -553,9 +505,9 @@ int bf_cgen_update(struct bf_cgen *cgen, struct bf_chain **new_chain, bf_chain_free(&cgen->chain); cgen->chain = TAKE_PTR(*new_chain); - r = _bf_cgen_persist(cgen, pindir_fd); + r = _bf_cgen_persist(cgen, lock->chain_fd); if (r) { - bf_handle_unpin(cgen->handle, pindir_fd); + bf_handle_unpin(cgen->handle, lock); return bf_err_r(r, "failed to persist cgen for '%s'", cgen->chain->name); } @@ -570,22 +522,15 @@ void bf_cgen_detach(struct bf_cgen *cgen) bf_handle_detach(cgen->handle); } -void bf_cgen_unload(struct bf_cgen *cgen) +void bf_cgen_unload(struct bf_cgen *cgen, struct bf_lock *lock) { - _cleanup_close_ int chain_fd = -1; - assert(cgen); + assert(lock); - chain_fd = _bf_cgen_get_chain_pindir_fd(cgen->chain->name); - if (chain_fd < 0) { - bf_err_r(chain_fd, "failed to open pin directory for '%s'", - cgen->chain->name); - return; - } - - // The chain's pin directory will be removed in bf_cgen_free() - unlinkat(chain_fd, _BF_CTX_PIN_NAME, 0); - bf_handle_unpin(cgen->handle, chain_fd); + /* The chain's pin directory will be removed by bf_lock_release_chain() + * if a `BF_LOCK_WRITE` lock is held. */ + unlinkat(lock->chain_fd, _BF_CTX_PIN_NAME, 0); + bf_handle_unpin(cgen->handle, lock); bf_handle_unload(cgen->handle); } diff --git a/src/libbpfilter/cgen/cgen.h b/src/libbpfilter/cgen/cgen.h index 2475c7e37..6c77a88d0 100644 --- a/src/libbpfilter/cgen/cgen.h +++ b/src/libbpfilter/cgen/cgen.h @@ -15,6 +15,7 @@ struct bf_chain; struct bf_handle; struct bf_hookopts; +struct bf_lock; #define _free_bf_cgen_ __attribute__((cleanup(bf_cgen_free))) @@ -51,15 +52,15 @@ int bf_cgen_new(struct bf_cgen **cgen, struct bf_chain **chain); /** * @brief Allocate and initialize a codegen from a pinned context map. * - * Opens the `bf_ctx` context map pinned in the directory referenced by - * `dir_fd`, reads the serialized cgen data, and deserializes it. + * Opens the `bf_ctx` context map pinned in the chain directory referenced by + * `lock`, reads the serialized cgen data, and deserializes it. * * @param cgen Codegen to allocate and initialize. Can't be NULL. - * @param dir_fd File descriptor of the chain's bpffs pin directory. Must be - * valid. + * @param lock Lock providing the chain directory file descriptor. Must hold + * a valid `chain_fd`. Can't be NULL. * @return 0 on success, or a negative errno value on failure. */ -int bf_cgen_new_from_dir_fd(struct bf_cgen **cgen, int dir_fd); +int bf_cgen_new_from_dir_fd(struct bf_cgen **cgen, struct bf_lock *lock); /** * Free a codegen. @@ -90,9 +91,12 @@ int bf_cgen_pack(const struct bf_cgen *cgen, bf_wpack_t *pack); * * @param cgen Codegen to attach to the kernel. Can't be NULL. * @param hookopts Hook options. + * @param lock Lock providing the chain directory file descriptor. Must hold a + * valid `chain_fd`. Can't be NULL. * @return 0 on success, or negative errno value on failure. */ -int bf_cgen_set(struct bf_cgen *cgen, struct bf_hookopts **hookopts); +int bf_cgen_set(struct bf_cgen *cgen, struct bf_hookopts **hookopts, + struct bf_lock *lock); /** * Create and load a `bf_program` into the kernel. @@ -102,9 +106,11 @@ int bf_cgen_set(struct bf_cgen *cgen, struct bf_hookopts **hookopts); * loaded into the kernel. * * @param cgen Codegen to load into the kernel. Can't be NULL. + * @param lock Lock providing the chain directory file descriptor. Must hold a + * valid `chain_fd`. Can't be NULL. * @return 0 on success, or negative errno value on failure. */ -int bf_cgen_load(struct bf_cgen *cgen); +int bf_cgen_load(struct bf_cgen *cgen, struct bf_lock *lock); /** * Attach a loaded program to a hook. @@ -116,9 +122,12 @@ int bf_cgen_load(struct bf_cgen *cgen); * * @param cgen Codegen to attach to the kernel. Can't be NULL. * @param hookopts Hook options. Can't be NULL. + * @param lock Lock providing the chain directory file descriptor. Must hold a + * valid `chain_fd`. Can't be NULL. * @return 0 on success, or negative errno value on failure. */ -int bf_cgen_attach(struct bf_cgen *cgen, struct bf_hookopts **hookopts); +int bf_cgen_attach(struct bf_cgen *cgen, struct bf_hookopts **hookopts, + struct bf_lock *lock); /** * Flags to control the behavior of `bf_cgen_update`. @@ -145,10 +154,12 @@ enum bf_cgen_update_flags * @param new_chain Chain containing the new rules, sets, and policy. * Can't be NULL. * @param flags Flags to control update behavior. 0 if no flags. + * @param lock Lock providing the chain directory file descriptor. Must hold a + * valid `chain_fd`. Can't be NULL. * @return 0 on success, or negative errno value on failure. */ int bf_cgen_update(struct bf_cgen *cgen, struct bf_chain **new_chain, - uint32_t flags); + uint32_t flags, struct bf_lock *lock); /** * Detach a program from the kernel. @@ -163,8 +174,10 @@ void bf_cgen_detach(struct bf_cgen *cgen); * Unload a program from the kernel. * * @param cgen Codege to unload. Can't be NULL. + * @param lock Lock providing the chain directory file descriptor. Must hold a + * valid `chain_fd`. Can't be NULL. */ -void bf_cgen_unload(struct bf_cgen *cgen); +void bf_cgen_unload(struct bf_cgen *cgen, struct bf_lock *lock); void bf_cgen_dump(const struct bf_cgen *cgen, prefix_t *prefix); diff --git a/src/libbpfilter/cgen/handle.c b/src/libbpfilter/cgen/handle.c index 0a2b07836..ff70fb33f 100644 --- a/src/libbpfilter/cgen/handle.c +++ b/src/libbpfilter/cgen/handle.c @@ -22,6 +22,7 @@ #include "cgen/prog/link.h" #include "cgen/prog/map.h" +#include "core/lock.h" #define _BF_LINK_NAME "bf_link" @@ -45,18 +46,19 @@ int bf_handle_new(struct bf_handle **handle, const char *prog_name) return 0; } -int bf_handle_new_from_pack(struct bf_handle **handle, int dir_fd, +int bf_handle_new_from_pack(struct bf_handle **handle, struct bf_lock *lock, bf_rpack_node_t node) { _free_bf_handle_ struct bf_handle *_handle = NULL; _cleanup_free_ char *name = NULL; bf_rpack_node_t child, array_node; + int dir_fd; int r; assert(handle); + assert(lock); - if (dir_fd < 0) - return bf_err_r(-EBADFD, "invalid directory FD"); + dir_fd = lock->chain_fd; r = bf_rpack_kv_str(node, "prog_name", &name); if (r) @@ -262,11 +264,15 @@ void bf_handle_dump(const struct bf_handle *handle, prefix_t *prefix) bf_dump_prefix_pop(prefix); } -int bf_handle_pin(struct bf_handle *handle, int dir_fd) +int bf_handle_pin(struct bf_handle *handle, struct bf_lock *lock) { + int dir_fd; int r; assert(handle); + assert(lock); + + dir_fd = lock->chain_fd; r = bf_bpf_obj_pin(handle->prog_name, handle->prog_fd, dir_fd); if (r) { @@ -312,7 +318,7 @@ int bf_handle_pin(struct bf_handle *handle, int dir_fd) } if (handle->link) { - r = bf_link_pin(handle->link, dir_fd); + r = bf_link_pin(handle->link, lock); if (r) { bf_err_r(r, "failed to pin BPF link"); goto err_unpin_all; @@ -322,13 +328,18 @@ int bf_handle_pin(struct bf_handle *handle, int dir_fd) return 0; err_unpin_all: - bf_handle_unpin(handle, dir_fd); + bf_handle_unpin(handle, lock); return r; } -void bf_handle_unpin(struct bf_handle *handle, int dir_fd) +void bf_handle_unpin(struct bf_handle *handle, struct bf_lock *lock) { + int dir_fd; + assert(handle); + assert(lock); + + dir_fd = lock->chain_fd; if (handle->cmap) bf_map_unpin(handle->cmap, dir_fd); @@ -347,7 +358,7 @@ void bf_handle_unpin(struct bf_handle *handle, int dir_fd) } if (handle->link) - bf_link_unpin(handle->link, dir_fd); + bf_link_unpin(handle->link, lock); unlinkat(dir_fd, handle->prog_name, 0); } diff --git a/src/libbpfilter/cgen/handle.h b/src/libbpfilter/cgen/handle.h index 5bddeb5a0..ad0c30d16 100644 --- a/src/libbpfilter/cgen/handle.h +++ b/src/libbpfilter/cgen/handle.h @@ -13,6 +13,7 @@ #include struct bf_link; +struct bf_lock; struct bf_map; struct bf_counter; @@ -72,12 +73,12 @@ int bf_handle_new(struct bf_handle **handle, const char *prog_name); * @brief Allocate and initialize a bf_handle from serialized data. * * @param handle `bf_handle` object to allocate and initialize. Can't be NULL. - * @param dir_fd File descriptor of the directory containing the pinned objects. - * Must be a valid file descriptor. + * @param lock Lock providing the chain directory file descriptor of the + * pinned objects. Must hold a valid `chain_fd`. Can't be NULL. * @param node Node containing the serialized handle data. * @return 0 on success, or a negative errno value on failure. */ -int bf_handle_new_from_pack(struct bf_handle **handle, int dir_fd, +int bf_handle_new_from_pack(struct bf_handle **handle, struct bf_lock *lock, bf_rpack_node_t node); /** @@ -111,24 +112,26 @@ void bf_handle_dump(const struct bf_handle *handle, prefix_t *prefix); /** * @brief Pin the BPF objects to the filesystem. * - * Pins the program and all maps/link to the directory specified by `dir_fd`. - * The link is only pinned if the program is attached. + * Pins the program and all maps/link to the chain directory referenced by + * `lock`. The link is only pinned if the program is attached. * * @param handle Handle containing the BPF objects to pin. Can't be NULL. - * @param dir_fd File descriptor of the directory to pin into. + * @param lock Lock providing the chain directory file descriptor. Must hold a + * valid `chain_fd`. Can't be NULL. * @return 0 on success, or a negative errno value on failure. */ -int bf_handle_pin(struct bf_handle *handle, int dir_fd); +int bf_handle_pin(struct bf_handle *handle, struct bf_lock *lock); /** * @brief Unpin the BPF objects from the filesystem. * - * Unpins all BPF objects from the directory. This function never fails. + * Unpins all BPF objects from the chain directory. This function never fails. * * @param handle Handle containing the BPF objects to unpin. Can't be NULL. - * @param dir_fd File descriptor of the directory containing the pins. + * @param lock Lock providing the chain directory file descriptor. Must hold a + * valid `chain_fd`. Can't be NULL. */ -void bf_handle_unpin(struct bf_handle *handle, int dir_fd); +void bf_handle_unpin(struct bf_handle *handle, struct bf_lock *lock); /** * @brief Get a counter value from the handle's counters map. diff --git a/src/libbpfilter/cgen/prog/link.c b/src/libbpfilter/cgen/prog/link.c index d557c10cd..40cff5e07 100644 --- a/src/libbpfilter/cgen/prog/link.c +++ b/src/libbpfilter/cgen/prog/link.c @@ -23,6 +23,8 @@ #include #include +#include "core/lock.h" + static int _bf_link_try_attach_xdp(int prog_fd, int ifindex, enum bf_hook hook); int bf_link_new(struct bf_link **link, const char *name, enum bf_hook hook, @@ -305,23 +307,21 @@ int bf_link_update(struct bf_link *link, int prog_fd) return r; } -int bf_link_pin(struct bf_link *link, int dir_fd) +int bf_link_pin(struct bf_link *link, struct bf_lock *lock) { int r; assert(link); + assert(lock); - if (dir_fd < 0) - return bf_err_r(-EINVAL, "directory file descriptor is invalid"); - - r = bf_bpf_obj_pin("bf_link", link->fd, dir_fd); + r = bf_bpf_obj_pin("bf_link", link->fd, lock->chain_fd); if (r) return bf_err_r(r, "failed to pin BPF link"); if (link->fd_extra >= 0) { - r = bf_bpf_obj_pin("bf_link_extra", link->fd_extra, dir_fd); + r = bf_bpf_obj_pin("bf_link_extra", link->fd_extra, lock->chain_fd); if (r) { - bf_link_unpin(link, dir_fd); + bf_link_unpin(link, lock); return bf_err_r(r, "failed to pin extra BPF link"); } } @@ -329,27 +329,22 @@ int bf_link_pin(struct bf_link *link, int dir_fd) return 0; } -void bf_link_unpin(struct bf_link *link, int dir_fd) +void bf_link_unpin(struct bf_link *link, struct bf_lock *lock) { int r; - assert(link); - - if (dir_fd < 0) { - bf_err_r(-EINVAL, "directory file descriptor is invalid"); - return; - } + assert(lock); (void)link; - r = unlinkat(dir_fd, "bf_link", 0); + r = unlinkat(lock->chain_fd, "bf_link", 0); if (r < 0 && errno != ENOENT) { // Do not warn on ENOENT, we want the file to be gone! bf_warn_r(errno, "failed to unlink BPF link, assuming the link is not pinned"); } - r = unlinkat(dir_fd, "bf_link_extra", 0); + r = unlinkat(lock->chain_fd, "bf_link_extra", 0); if (r < 0 && errno != ENOENT) { // Do not warn on ENOENT, we want the file to be gone! bf_warn_r( diff --git a/src/libbpfilter/cgen/prog/link.h b/src/libbpfilter/cgen/prog/link.h index 80e1a70bb..04199ad0d 100644 --- a/src/libbpfilter/cgen/prog/link.h +++ b/src/libbpfilter/cgen/prog/link.h @@ -12,6 +12,8 @@ #include #include +struct bf_lock; + /** * BPF link object. */ @@ -116,17 +118,17 @@ int bf_link_update(struct bf_link *link, int prog_fd); * @brief Pin the link to the system. * * @param link Link to pin. Can't be NULL. - * @param dir_fd File descriptor of the directory to pin the link into. Must be - * a valid file descriptor. + * @param lock Lock providing the chain directory file descriptor to pin the + * link into. Must hold a valid `chain_fd`. Can't be NULL. * @return 0 on success, or a negative errno value on error. */ -int bf_link_pin(struct bf_link *link, int dir_fd); +int bf_link_pin(struct bf_link *link, struct bf_lock *lock); /** * @brief Unpin the link from the system. * - * @param link Link to unpin. Can't be NULL. - * @param dir_fd File descriptor of the directory to unpin the link from. Must - * be a valid file descriptor. + * @param link Link to unpin. + * @param lock Lock providing the chain directory file descriptor to unpin the + * link from. Must hold a valid `chain_fd`. Can't be NULL. */ -void bf_link_unpin(struct bf_link *link, int dir_fd); +void bf_link_unpin(struct bf_link *link, struct bf_lock *lock); diff --git a/src/libbpfilter/cli.c b/src/libbpfilter/cli.c index acc9e1dda..b3389f737 100644 --- a/src/libbpfilter/cli.c +++ b/src/libbpfilter/cli.c @@ -21,6 +21,7 @@ #include "cgen/handle.h" #include "cgen/prog/link.h" #include "cgen/prog/map.h" +#include "core/lock.h" static int copy_hookopts(struct bf_hookopts **dest, const struct bf_hookopts *src) @@ -44,19 +45,63 @@ static int copy_hookopts(struct bf_hookopts **dest, return 0; } +/** + * @brief Unload every chain currently pinned in the pin directory. + * + * The caller must already hold a `BF_LOCK_WRITE` lock on the pin directory + * (typically via `bf_lock_init(BF_LOCK_WRITE)`). For each chain entry, a + * per-chain `BF_LOCK_WRITE` lock is acquired (so the chain dir is removed + * by `bf_lock_release_chain` after unload). + */ +static int _bf_ruleset_flush(struct bf_lock *lock) +{ + _free_bf_list_ bf_list *cgens = NULL; + int r; + + assert(lock); + + r = bf_ctx_get_cgens(lock, &cgens); + if (r) + return bf_err_r(r, "failed to discover chains during flush"); + + bf_list_foreach (cgens, cgen_node) { + struct bf_cgen *cgen = bf_list_node_get_data(cgen_node); + + r = bf_lock_acquire_chain(lock, cgen->chain->name, BF_LOCK_WRITE, + false); + if (r) { + bf_warn_r( + r, + "failed to acquire WRITE lock on chain '%s' during flush, skipping", + cgen->chain->name); + continue; + } + + bf_cgen_unload(cgen, lock); + bf_lock_release_chain(lock); + } + + return 0; +} + int bf_ruleset_get(bf_list *chains, bf_list *hookopts, bf_list *counters) { - _clean_bf_list_ bf_list cgens = bf_list_default(NULL, NULL); + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + _free_bf_list_ bf_list *cgens = NULL; _clean_bf_list_ bf_list _chains = bf_list_default_from(*chains); _clean_bf_list_ bf_list _hookopts = bf_list_default_from(*hookopts); _clean_bf_list_ bf_list _counters = bf_list_default_from(*counters); int r; - r = bf_ctx_get_cgens(&cgens); + r = bf_lock_init(&lock, BF_LOCK_READ); + if (r) + return bf_err_r(r, "failed to acquire READ lock for ruleset get"); + + r = bf_ctx_get_cgens(&lock, &cgens); if (r < 0) return bf_err_r(r, "failed to get cgen list"); - bf_list_foreach (&cgens, cgen_node) { + bf_list_foreach (cgens, cgen_node) { struct bf_cgen *cgen = bf_list_node_get_data(cgen_node); _free_bf_chain_ struct bf_chain *chain = NULL; _free_bf_hookopts_ struct bf_hookopts *hookopts_copy = NULL; @@ -105,6 +150,7 @@ int bf_ruleset_get(bf_list *chains, bf_list *hookopts, bf_list *counters) int bf_ruleset_set(bf_list *chains, bf_list *hookopts) { + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); struct bf_list_node *chain_node = bf_list_get_head(chains); struct bf_list_node *hookopts_node = bf_list_get_head(hookopts); int r; @@ -112,7 +158,13 @@ int bf_ruleset_set(bf_list *chains, bf_list *hookopts) if (bf_list_size(chains) != bf_list_size(hookopts)) return -EINVAL; - bf_ctx_flush(); + r = bf_lock_init(&lock, BF_LOCK_WRITE); + if (r) + return bf_err_r(r, "failed to acquire WRITE lock for ruleset set"); + + r = _bf_ruleset_flush(&lock); + if (r) + return r; while (chain_node && hookopts_node) { _free_bf_cgen_ struct bf_cgen *cgen = NULL; @@ -136,19 +188,22 @@ int bf_ruleset_set(bf_list *chains, bf_list *hookopts) if (r) goto err_load; - r = bf_cgen_set(cgen, hookopts_copy ? &hookopts_copy : NULL); + r = bf_lock_acquire_chain(&lock, cgen->chain->name, BF_LOCK_WRITE, + true); if (r) { - bf_err_r(r, "failed to set chain '%s'", cgen->chain->name); + bf_err_r(r, "failed to acquire WRITE lock on chain '%s'", + cgen->chain->name); goto err_load; } - r = bf_ctx_set_cgen(cgen); + r = bf_cgen_set(cgen, hookopts_copy ? &hookopts_copy : NULL, &lock); if (r) { - bf_cgen_unload(cgen); + bf_err_r(r, "failed to set chain '%s'", cgen->chain->name); + bf_lock_release_chain(&lock); goto err_load; } - TAKE_PTR(cgen); + bf_lock_release_chain(&lock); chain_node = bf_list_node_next(chain_node); hookopts_node = bf_list_node_next(hookopts_node); @@ -157,20 +212,26 @@ int bf_ruleset_set(bf_list *chains, bf_list *hookopts) return 0; err_load: - bf_ctx_flush(); + _bf_ruleset_flush(&lock); return r; } int bf_ruleset_flush(void) { - bf_ctx_flush(); + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + int r; - return 0; + r = bf_lock_init(&lock, BF_LOCK_WRITE); + if (r) + return bf_err_r(r, "failed to acquire WRITE lock for ruleset flush"); + + return _bf_ruleset_flush(&lock); } int bf_chain_set(struct bf_chain *chain, struct bf_hookopts *hookopts) { - struct bf_cgen *old_cgen; + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + _free_bf_cgen_ struct bf_cgen *old_cgen = NULL; _free_bf_cgen_ struct bf_cgen *new_cgen = NULL; _free_bf_chain_ struct bf_chain *chain_copy = NULL; _free_bf_hookopts_ struct bf_hookopts *hookopts_copy = NULL; @@ -178,6 +239,32 @@ int bf_chain_set(struct bf_chain *chain, struct bf_hookopts *hookopts) assert(chain); + /* bf_chain_set is a namespace mutator: the previous chain (if any) is + * destroyed and a fresh one is published under the same name. Take + * pindir WRITE for the whole operation so the flush-then-load + * sequence is atomic w.r.t. every other libbpfilter caller. */ + r = bf_lock_init(&lock, BF_LOCK_WRITE); + if (r) + return r; + + /* Unload any pre-existing chain under this name, and remove its pindir + * entry so stage-and-rename can publish the new one. */ + r = bf_lock_acquire_chain(&lock, chain->name, BF_LOCK_WRITE, false); + if (r == 0) { + r = bf_ctx_get_cgen(&lock, &old_cgen); + if (r && r != -ENOENT) { + bf_lock_release_chain(&lock); + return r; + } + if (old_cgen) + bf_cgen_unload(old_cgen, &lock); + /* Release drops the chain flock and (because we held WRITE) + * removes the now-empty chain dir. */ + bf_lock_release_chain(&lock); + } else if (r != -ENOENT) { + return r; + } + r = bf_chain_new_from_copy(&chain_copy, chain); if (r) return r; @@ -192,32 +279,22 @@ int bf_chain_set(struct bf_chain *chain, struct bf_hookopts *hookopts) if (r) return r; - old_cgen = bf_ctx_get_cgen(new_cgen->chain->name); - if (old_cgen) - (void)bf_ctx_delete_cgen(old_cgen, true); - - r = bf_cgen_set(new_cgen, hookopts_copy ? &hookopts_copy : NULL); + /* Create the new chain dir via stage-and-rename (I3). */ + r = bf_lock_acquire_chain(&lock, chain->name, BF_LOCK_WRITE, true); if (r) return r; - r = bf_ctx_set_cgen(new_cgen); - if (r) { - bf_cgen_unload(new_cgen); - return r; - } - - TAKE_PTR(new_cgen); - - return 0; + return bf_cgen_set(new_cgen, hookopts_copy ? &hookopts_copy : NULL, &lock); } int bf_chain_get(const char *name, struct bf_chain **chain, struct bf_hookopts **hookopts, bf_list *counters) { + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); _free_bf_chain_ struct bf_chain *_chain = NULL; _free_bf_hookopts_ struct bf_hookopts *_hookopts = NULL; _clean_bf_list_ bf_list _counters = bf_list_default_from(*counters); - struct bf_cgen *cgen; + _free_bf_cgen_ struct bf_cgen *cgen = NULL; int r; assert(name); @@ -225,9 +302,13 @@ int bf_chain_get(const char *name, struct bf_chain **chain, assert(hookopts); assert(counters); - cgen = bf_ctx_get_cgen(name); - if (!cgen) - return bf_err_r(-ENOENT, "chain '%s' not found", name); + r = bf_lock_init_for_chain(&lock, name, BF_LOCK_READ, BF_LOCK_READ, false); + if (r) + return r; + + r = bf_ctx_get_cgen(&lock, &cgen); + if (r) + return r; r = bf_chain_new_from_copy(&_chain, cgen->chain); if (r) @@ -252,14 +333,19 @@ int bf_chain_get(const char *name, struct bf_chain **chain, int bf_chain_prog_fd(const char *name) { - struct bf_cgen *cgen; + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + _free_bf_cgen_ struct bf_cgen *cgen = NULL; + int r; - if (!name) - return -EINVAL; + assert(name); - cgen = bf_ctx_get_cgen(name); - if (!cgen) - return bf_err_r(-ENOENT, "failed to find chain '%s'", name); + r = bf_lock_init_for_chain(&lock, name, BF_LOCK_READ, BF_LOCK_READ, false); + if (r) + return r; + + r = bf_ctx_get_cgen(&lock, &cgen); + if (r) + return r; if (cgen->handle->prog_fd == -1) return bf_err_r(-ENODEV, "chain '%s' has no loaded program", name); @@ -269,14 +355,19 @@ int bf_chain_prog_fd(const char *name) int bf_chain_logs_fd(const char *name) { - struct bf_cgen *cgen; + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + _free_bf_cgen_ struct bf_cgen *cgen = NULL; + int r; - if (!name) - return -EINVAL; + assert(name); + + r = bf_lock_init_for_chain(&lock, name, BF_LOCK_READ, BF_LOCK_READ, false); + if (r) + return r; - cgen = bf_ctx_get_cgen(name); - if (!cgen) - return bf_err_r(-ENOENT, "failed to find chain '%s'", name); + r = bf_ctx_get_cgen(&lock, &cgen); + if (r) + return r; if (!cgen->handle->lmap) return bf_err_r(-ENOENT, "chain '%s' has no logs buffer", name); @@ -286,50 +377,50 @@ int bf_chain_logs_fd(const char *name) int bf_chain_load(struct bf_chain *chain) { + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); _free_bf_cgen_ struct bf_cgen *cgen = NULL; _free_bf_chain_ struct bf_chain *chain_copy = NULL; int r; assert(chain); - if (bf_ctx_get_cgen(chain->name)) - return bf_err_r(-EEXIST, "chain '%s' already exists", chain->name); - - r = bf_chain_new_from_copy(&chain_copy, chain); + /* chain_load is a namespace mutator: pindir WRITE + chain WRITE on the + * staged inode. Stage-and-rename (I3) returns `-EEXIST` if another + * creator already claimed the name, which replaces the former + * check-then-create sequence atomically. */ + r = bf_lock_init_for_chain(&lock, chain->name, BF_LOCK_WRITE, BF_LOCK_WRITE, + true); if (r) return r; - r = bf_cgen_new(&cgen, &chain_copy); + r = bf_chain_new_from_copy(&chain_copy, chain); if (r) return r; - r = bf_cgen_load(cgen); + r = bf_cgen_new(&cgen, &chain_copy); if (r) return r; - r = bf_ctx_set_cgen(cgen); - if (r) { - bf_cgen_unload(cgen); - return bf_err_r(r, "failed to add cgen to the runtime context"); - } - - TAKE_PTR(cgen); - - return 0; + return bf_cgen_load(cgen, &lock); } int bf_chain_attach(const char *name, const struct bf_hookopts *hookopts) { - struct bf_cgen *cgen; + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + _free_bf_cgen_ struct bf_cgen *cgen = NULL; _free_bf_hookopts_ struct bf_hookopts *hookopts_copy = NULL; int r; assert(name); assert(hookopts); - cgen = bf_ctx_get_cgen(name); - if (!cgen) - return bf_err_r(-ENOENT, "chain '%s' does not exist", name); + r = bf_lock_init_for_chain(&lock, name, BF_LOCK_READ, BF_LOCK_WRITE, false); + if (r) + return r; + + r = bf_ctx_get_cgen(&lock, &cgen); + if (r) + return r; if (cgen->handle->link) return bf_err_r(-EBUSY, "chain '%s' is already linked to a hook", name); @@ -342,7 +433,7 @@ int bf_chain_attach(const char *name, const struct bf_hookopts *hookopts) if (r) return r; - r = bf_cgen_attach(cgen, &hookopts_copy); + r = bf_cgen_attach(cgen, &hookopts_copy, &lock); if (r) return bf_err_r(r, "failed to attach codegen to hook"); @@ -351,25 +442,27 @@ int bf_chain_attach(const char *name, const struct bf_hookopts *hookopts) int bf_chain_update(const struct bf_chain *chain) { + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); _free_bf_chain_ struct bf_chain *chain_copy = NULL; - struct bf_cgen *cgen; + _free_bf_cgen_ struct bf_cgen *cgen = NULL; int r; assert(chain); - cgen = bf_ctx_get_cgen(chain->name); - if (!cgen) - return -ENOENT; + r = bf_lock_init_for_chain(&lock, chain->name, BF_LOCK_READ, BF_LOCK_WRITE, + false); + if (r) + return r; - r = bf_chain_new_from_copy(&chain_copy, chain); + r = bf_ctx_get_cgen(&lock, &cgen); if (r) return r; - r = bf_cgen_update(cgen, &chain_copy, 0); + r = bf_chain_new_from_copy(&chain_copy, chain); if (r) return r; - return 0; + return bf_cgen_update(cgen, &chain_copy, 0, &lock); } static int copy_set(struct bf_set **dest, const struct bf_set *src) @@ -409,9 +502,10 @@ static int copy_set(struct bf_set **dest, const struct bf_set *src) int bf_chain_update_set(const char *name, const struct bf_set *to_add, const struct bf_set *to_remove) { + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); _free_bf_chain_ struct bf_chain *new_chain = NULL; struct bf_set *dest_set = NULL; - struct bf_cgen *cgen; + _free_bf_cgen_ struct bf_cgen *cgen = NULL; _free_bf_set_ struct bf_set *add_copy = NULL; _free_bf_set_ struct bf_set *remove_copy = NULL; int r; @@ -423,9 +517,13 @@ int bf_chain_update_set(const char *name, const struct bf_set *to_add, if (!bf_streq(to_add->name, to_remove->name)) return bf_err_r(-EINVAL, "to_add->name must match to_remove->name"); - cgen = bf_ctx_get_cgen(name); - if (!cgen) - return bf_err_r(-ENOENT, "chain '%s' does not exist", name); + r = bf_lock_init_for_chain(&lock, name, BF_LOCK_READ, BF_LOCK_WRITE, false); + if (r) + return r; + + r = bf_ctx_get_cgen(&lock, &cgen); + if (r) + return r; r = bf_chain_new_from_copy(&new_chain, cgen->chain); if (r) @@ -452,7 +550,7 @@ int bf_chain_update_set(const char *name, const struct bf_set *to_add, return bf_err_r(r, "failed to calculate set difference"); r = bf_cgen_update(cgen, &new_chain, - BF_FLAG(BF_CGEN_UPDATE_PRESERVE_COUNTERS)); + BF_FLAG(BF_CGEN_UPDATE_PRESERVE_COUNTERS), &lock); if (r) return bf_err_r(r, "failed to update chain with new set data"); @@ -461,13 +559,24 @@ int bf_chain_update_set(const char *name, const struct bf_set *to_add, int bf_chain_flush(const char *name) { - struct bf_cgen *cgen; + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + _free_bf_cgen_ struct bf_cgen *cgen = NULL; + int r; assert(name); - cgen = bf_ctx_get_cgen(name); - if (!cgen) - return -ENOENT; + /* chain_flush removes the chain dir from the pindir namespace, so it + * needs pindir WRITE (I2). */ + r = bf_lock_init_for_chain(&lock, name, BF_LOCK_WRITE, BF_LOCK_WRITE, + false); + if (r) + return r; + + r = bf_ctx_get_cgen(&lock, &cgen); + if (r) + return r; + + bf_cgen_unload(cgen, &lock); - return bf_ctx_delete_cgen(cgen, true); + return 0; } diff --git a/src/libbpfilter/core/lock.c b/src/libbpfilter/core/lock.c new file mode 100644 index 000000000..7acaf8a50 --- /dev/null +++ b/src/libbpfilter/core/lock.c @@ -0,0 +1,422 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + */ + +/* `renameat2` and `RENAME_NOREPLACE` require _GNU_SOURCE from glibc. */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include "core/lock.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define BF_PERM_755 (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) + +/** Bounded retry count for the "recheck-after-flock" loop (P1). Each failed + * attempt corresponds to a completed `unlink + recreate` by another + * `BF_LOCK_WRITE` holder, so this budget is extremely generous in practice. */ +#define BF_LOCK_MAX_RETRIES 8 + +/** Bounded retry count for the staging name collision loop. Collisions are + * astronomically rare given the random suffix, so a small budget suffices. */ +#define BF_LOCK_STAGING_NAME_RETRIES 4 + +/** Number of random bytes pulled from `/dev/urandom` for the staging suffix. */ +#define BF_LOCK_STAGING_RAND_BYTES 8 + +/** + * @brief Apply an `flock(2)` of the requested mode on `fd`. + * + * `BF_LOCK_NONE` is a no-op; `BF_LOCK_READ` maps to `LOCK_SH`; `BF_LOCK_WRITE` + * maps to `LOCK_EX`. All requests are non-blocking (`LOCK_NB`): contention + * returns `-EWOULDBLOCK` immediately rather than waiting. + * + * @param fd File descriptor to lock. + * @param mode Locking mode, see `bf_lock_mode`. + * @return 0 on success, or a negative errno value on failure. + */ +static int _bf_flock(int fd, enum bf_lock_mode mode) +{ + int op = LOCK_NB; + + if (mode >= _BF_LOCK_MAX) + return -EINVAL; + + if (fd < 0) + return -EBADFD; + + if (mode == BF_LOCK_NONE) + return 0; + + op |= (mode == BF_LOCK_WRITE) ? LOCK_EX : LOCK_SH; + + if (flock(fd, op) < 0) + return -errno; + + return 0; +} + +/** + * @brief Fill `buf` with a unique staging name. + * + * The name has the form `_` where `` is a hex-encoded + * random suffix pulled from `/dev/urandom`. Uniqueness isn't strictly required + * (collisions cause `mkdirat(EEXIST)` and are retried by the caller), but a + * unique name avoids any chance of contention on the staging flock. + * + * If `/dev/urandom` cannot be read, fall back to a name based on `pid` and + * `time(NULL)`; the caller's retry loop will paper over the rare collision + * that this fallback could produce. + * + * @param buf Buffer to write the staging name into. + * @param size Size of `buf`. + */ +static void _bf_make_staging_name(char *buf, size_t size) +{ + unsigned char rand[BF_LOCK_STAGING_RAND_BYTES] = {0}; + char hex[(BF_LOCK_STAGING_RAND_BYTES * 2) + 1]; + _cleanup_close_ int fd = -1; + ssize_t n = -1; + + fd = open("/dev/urandom", O_RDONLY | O_CLOEXEC); + if (fd >= 0) + n = read(fd, rand, sizeof(rand)); + + if (n != (ssize_t)sizeof(rand)) { + /* Fallback: derive bytes from `time(NULL)` so two processes that + * both fail to read /dev/urandom in the same second still differ on + * `pid`. Collisions are handled by the caller's retry loop. */ + unsigned long fallback = (unsigned long)time(NULL); + for (size_t i = 0; i < sizeof(rand); ++i) + rand[i] = (unsigned char)(fallback >> (i * 8)); + } + + for (size_t i = 0; i < sizeof(rand); ++i) + (void)snprintf(&hex[i * 2], 3, "%02x", rand[i]); + + /* bpffs rejects names starting with '.', so the prefix and format must + * stick to [a-zA-Z0-9_-]. */ + (void)snprintf(buf, size, "%s%d_%s", BF_LOCK_STAGING_PREFIX, (int)getpid(), + hex); +} + +/** + * @brief Stage-and-rename primitive (I3). + * + * Create a uniquely-named staging directory under `pindir_fd`, open it, + * acquire an exclusive flock on it, then atomically publish it as `name` + * via `renameat2(RENAME_NOREPLACE)`. + * + * On success, returns a locked file descriptor referring to the inode now + * reachable as `/`. + * + * On failure, any state created (staging dir) is cleaned up; no side + * effects leak out. + * @param pindir_fd File descriptor of the pin directory. + * @param name Name of the directory (in the pin directory) to open. + * @return The open and locked file descriptor on success, or a negative errno + * value on failure. + */ +static int _bf_lock_stage_and_publish(int pindir_fd, const char *name) +{ + char staging[NAME_MAX]; + _cleanup_close_ int fd = -1; + int r; + + assert(name); + + /* 1. Create a unique staging directory. Retry a small number of times + * if we happen to collide with our own prior staging dirs (should + * be astronomically rare given the random suffix). */ + for (int attempt = 0; attempt < BF_LOCK_STAGING_NAME_RETRIES; ++attempt) { + _bf_make_staging_name(staging, sizeof(staging)); + if (mkdirat(pindir_fd, staging, BF_PERM_755) == 0) + break; + if (errno != EEXIST) { + return bf_err_r(-errno, + "failed to create staging dir '%s' under pindir", + staging); + } + if (attempt == BF_LOCK_STAGING_NAME_RETRIES - 1) { + return bf_err_r( + -EAGAIN, + "failed to generate a unique staging dir name after retries"); + } + } + + /* 2. Open the staging directory. */ + fd = openat(pindir_fd, staging, O_DIRECTORY); + if (fd < 0) { + r = -errno; + (void)unlinkat(pindir_fd, staging, AT_REMOVEDIR); + return bf_err_r(r, "failed to open staging dir '%s'", staging); + } + + /* 3. Exclusively lock the staging inode. Cannot contend: we own the + * unique staging name. */ + r = _bf_flock(fd, BF_LOCK_WRITE); + if (r) { + (void)unlinkat(pindir_fd, staging, AT_REMOVEDIR); + return bf_err_r(r, "failed to flock staging dir '%s'", staging); + } + + /* 4. Publish atomically. `RENAME_NOREPLACE` ensures we lose cleanly to + * any concurrent creator that already claimed `name`. */ + if (renameat2(pindir_fd, staging, pindir_fd, name, RENAME_NOREPLACE) < 0) { + r = -errno; + /* Staging dir was never published; safe to remove (we hold its + * flock, nobody else can observe or lock it). */ + (void)unlinkat(pindir_fd, staging, AT_REMOVEDIR); + return r; + } + + return TAKE_FD(fd); +} + +/** + * @brief Open an existing chain dir with the recheck-after-flock protocol + * (P1). + * + * Between `openat` and `flock`, the name might be unlinked and recreated by + * another `BF_LOCK_WRITE` holder, which would leave us holding a lock on an + * orphaned inode. Detect this by comparing the inode we locked against the + * one currently reachable via the name, and retry on mismatch. + * + * On success, returns a locked file descriptor whose inode is guaranteed to + * be the one currently bound to `name`. + * @param pindir_fd File descriptor of the directory containing `name`. + * @param name Name of the directory to open and lock. + * @param mode Locking mode for `name`. + * @return Open and locked file descriptor to `name` in `pindir_fd`, or a + * negative errno value on failure. + */ +static int _bf_lock_open_existing(int pindir_fd, const char *name, + enum bf_lock_mode mode) +{ + int r; + + assert(name); + + for (int attempt = 0; attempt < BF_LOCK_MAX_RETRIES; ++attempt) { + _cleanup_close_ int fd = -1; + struct stat open_st; + struct stat live_st; + + fd = openat(pindir_fd, name, O_DIRECTORY); + if (fd < 0) + return -errno; + + r = _bf_flock(fd, mode); + if (r) + return r; + + if (fstat(fd, &open_st) < 0) + return -errno; + + if (fstatat(pindir_fd, name, &live_st, AT_SYMLINK_NOFOLLOW) < 0) { + /* Name is gone (ENOENT) or inaccessible. Retry. */ + continue; + } + + if (open_st.st_dev == live_st.st_dev && + open_st.st_ino == live_st.st_ino) + return TAKE_FD(fd); + + /* Mismatch: the name now resolves to a different inode. Our fd is + * pinned to the orphaned inode; drop it and retry. The flock is + * released when fd is closed. */ + } + + return bf_err_r( + -EAGAIN, + "failed to stably open chain '%s' after %d retries; likely extreme contention", + name, BF_LOCK_MAX_RETRIES); +} + +int bf_lock_init(struct bf_lock *lock, enum bf_lock_mode mode) +{ + _clean_bf_lock_ struct bf_lock _lock = bf_lock_default(); + const char *bpffs_path; + int r; + + assert(lock); + + bpffs_path = bf_ctx_get_bpffs_path(); + if (!bpffs_path) + return bf_err_r(-EINVAL, "context is not initialized"); + + _lock.bpffs_fd = bf_opendir(bpffs_path); + if (_lock.bpffs_fd < 0) { + return bf_err_r(_lock.bpffs_fd, "failed to open bpffs at %s", + bpffs_path); + } + + /* Create the pin directory lazily. Per I1, it is never removed by the + * library, so subsequent `openat` calls always see the same inode. */ + _lock.pindir_fd = bf_opendir_at(_lock.bpffs_fd, "bpfilter", true); + if (_lock.pindir_fd < 0) { + return bf_err_r(_lock.pindir_fd, + "failed to open pin directory %s/bpfilter", bpffs_path); + } + + r = _bf_flock(_lock.pindir_fd, mode); + if (r) + return r; + + _lock.pindir_lock = mode; + + bf_swap(*lock, _lock); + + return 0; +} + +int bf_lock_init_for_chain(struct bf_lock *lock, const char *name, + enum bf_lock_mode pindir_mode, + enum bf_lock_mode chain_mode, bool create) +{ + _clean_bf_lock_ struct bf_lock _lock = bf_lock_default(); + int r; + + assert(lock); + assert(name); + + if (create && pindir_mode != BF_LOCK_WRITE) { + return bf_err_r( + -EINVAL, + "creating a chain requires BF_LOCK_WRITE on the pin directory"); + } + + r = bf_lock_init(&_lock, pindir_mode); + if (r) + return r; + + r = bf_lock_acquire_chain(&_lock, name, chain_mode, create); + if (r) + return r; + + bf_swap(*lock, _lock); + + return 0; +} + +void bf_lock_cleanup(struct bf_lock *lock) +{ + assert(lock); + + // Quick exit if `lock` wasn't initialized + if (lock->bpffs_fd < 0) + return; + + bf_lock_release_chain(lock); + + /* Per I1, do NOT remove the pin directory. It persists for the + * lifetime of the bpffs mount. */ + closep(&lock->pindir_fd); + lock->pindir_lock = BF_LOCK_NONE; + + closep(&lock->bpffs_fd); +} + +int bf_lock_acquire_chain(struct bf_lock *lock, const char *name, + enum bf_lock_mode mode, bool create) +{ + _cleanup_free_ char *_name = NULL; + _cleanup_close_ int chain_fd = -1; + + assert(lock); + assert(name); + + if (lock->bpffs_fd < 0 || lock->pindir_fd < 0) { + return bf_err_r( + -EBADFD, + "attempting to acquire a chain lock on an invalid bf_lock"); + } + + if (lock->chain_fd >= 0) { + return bf_err_r(-EINVAL, "bf_lock already locks chain '%s'", + lock->chain_name); + } + + if (create) { + if (mode != BF_LOCK_WRITE) { + return bf_err_r( + -EINVAL, + "creating a chain requires BF_LOCK_WRITE on the chain directory"); + } + if (lock->pindir_lock != BF_LOCK_WRITE) { + return bf_err_r( + -EINVAL, + "creating a chain requires BF_LOCK_WRITE on the pin directory"); + } + } + + _name = strdup(name); + if (!_name) + return -ENOMEM; + + if (create) { + /* Stage-and-rename (I3). Returns a locked fd already reachable at + * the final name. */ + chain_fd = _bf_lock_stage_and_publish(lock->pindir_fd, _name); + if (chain_fd < 0) + return chain_fd; + } else { + /* Recheck-after-flock (P1). Returns a locked fd for the live + * inode of `name`, or an error. */ + chain_fd = _bf_lock_open_existing(lock->pindir_fd, _name, mode); + if (chain_fd < 0) + return chain_fd; + } + + lock->chain_fd = TAKE_FD(chain_fd); + lock->chain_name = TAKE_PTR(_name); + lock->chain_lock = mode; + + return 0; +} + +void bf_lock_release_chain(struct bf_lock *lock) +{ + assert(lock); + + if (lock->bpffs_fd < 0 || lock->pindir_fd < 0) { + bf_warn("attempting to release a chain lock on an invalid bf_lock"); + return; + } + + if (lock->chain_fd < 0) + return; + + /* Only WRITE locks will be used to create or modify a chain, meaning + * if a READ or NONE lock was held on a chain directory, that directory + * hasn't been modified during the lifetime of the lock. So there is no + * need to attempt to remove it. + * + * Per I2, this removal is only race-free against concurrent readers if + * the caller also holds BF_LOCK_WRITE on the pin directory. The + * locking matrix documented in `lock.h` ensures this. */ + if (lock->chain_lock == BF_LOCK_WRITE) + (void)unlinkat(lock->pindir_fd, lock->chain_name, AT_REMOVEDIR); + + closep(&lock->chain_fd); + + freep((void *)&lock->chain_name); + lock->chain_lock = BF_LOCK_NONE; +} diff --git a/src/libbpfilter/core/lock.h b/src/libbpfilter/core/lock.h new file mode 100644 index 000000000..f884d5491 --- /dev/null +++ b/src/libbpfilter/core/lock.h @@ -0,0 +1,331 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + */ + +#pragma once + +#include + +/** + * @file lock.h + * + * Lock object(s) to prevent concurrent access to the ruleset and/or a + * chain. + * + * Bundles the open file descriptors for `$BPFFS`, `$BPFFS/bpfilter` and + * an optional chain directory under it, and locks them if requested. + * + * # Usage + * + * A `bf_lock` should be defined with `bf_lock_default()` to ensure it contains + * valid defaults, which prevents API misuse and ensures `bf_lock_cleanup()` can + * be called safely. + * + * The `bf_lock` can then be initialized using `bf_lock_init()` to only open + * and lock the pin directory (`$BPFFS/bpfilter`). If `BF_LOCK_NONE` is used, + * the pin directory is not locked (useful if the caller already holds a + * compatible lock on it). + * + * Alternatively, you can open (and lock) both the pin directory and a chain + * directory using `bf_lock_init_for_chain()`. The caller chooses the lock mode + * for both the pin directory (`pindir_mode`) and the chain directory + * (`chain_mode`) independently. If the chain directory doesn't exist yet, + * `create=true` will create it atomically via stage-and-rename (see + * "Invariants" below). + * + * `bf_lock` is cleaned up using `bf_lock_cleanup()`: it is safe to call on a + * properly defined lock (using `bf_lock_default()`), an initialized lock, or an + * already cleaned-up lock (i.e. `bf_lock_cleanup()` has already been called). + * Use the `_clean_bf_lock_` variable attribute for automatic cleanup. + * + * # Locking matrix + * + * Operations on the pin directory and the chain directories should follow + * this locking policy: + * + * Operation | Object | Pindir lock | Chain dir lock + * -----------|---------|-------------|--------------------------- + * Read | Ruleset | `READ` | `READ` (per chain, in loop) + * Read | Chain | `READ` | `READ` + * Write | Ruleset | `WRITE` | `WRITE` (per chain, in loop) + * Write | Chain | `READ` | `WRITE` + * + * Rationale: + * - Pindir `WRITE` is required for any operation that mutates the pin + * directory namespace (creating, removing, or renaming an entry). This + * excludes all other pindir lockers (readers and content mutators). + * - Pindir `READ` is sufficient for operations that only resolve existing + * names (pure readers and content mutators). It is compatible with other + * readers/content mutators on *different* chains but mutually exclusive + * with pindir `WRITE`. + * - Chain `WRITE` is required for any mutation of a chain directory's + * contents. Chain `READ` is compatible with other chain readers. + * + * Pindir `WRITE` is mutually exclusive with every other pindir lock, so it + * should be reserved for namespace-level mutations. For single-chain content + * mutations, take pindir `READ` + chain `WRITE`; for single-chain reads, + * take pindir `READ` + chain `READ`. + * + * Ruleset-level operations iterate over every chain and must take a chain + * lock around each per-chain step (using `bf_lock_acquire_chain()` / + * `bf_lock_release_chain()`). + * + * # Invariants + * + * The locking scheme relies on three invariants enforced by the `bf_lock` + * module: + * - **I1: pindir is immortal.** The pin directory `$BPFFS/bpfilter/` is + * created lazily by `bf_lock_init()` and never removed by the library. + * This prevents a "remove under reader" race where one process would + * `unlinkat` the pindir between another process's `openat` and `flock`, + * leaving the second process holding a lock on an orphaned inode. + * - **I2: chain dirs are only removed under `WRITE`.** A chain directory + * can only be removed while its owner holds `BF_LOCK_WRITE` on the chain + * itself **and** `BF_LOCK_WRITE` on the pin directory. + * `bf_lock_release_chain()` only attempts removal when the released chain + * lock is `BF_LOCK_WRITE`; callers must also hold the pindir `WRITE` lock + * for the removal to be race-free against concurrent readers (this is + * ensured by the locking matrix above). + * - **I3: chain dirs are created atomically.** Creation goes through a + * "stage and rename" protocol: a uniquely-named staging directory is + * created and locked first, then atomically published to its final name + * via `renameat2(RENAME_NOREPLACE)`. Two concurrent creators therefore + * cannot step on each other: the loser's `renameat2()` returns `EEXIST` + * and it rolls back its own staging directory. The winner's chain + * directory is never touched by the loser's cleanup. + * + * Ruleset-level operations iterate over every chain and must take a chain + * lock around each per-chain step (using `bf_lock_acquire_chain` / + * `bf_lock_release_chain`): the pindir lock alone does not protect against + * a chain-level writer that holds only the chain's own `WRITE` lock. + * + * # API + */ + +/** + * @brief File lock mode used by `bf_lock`. + */ +enum bf_lock_mode +{ + /// `BF_LOCK_NONE` skips locking entirely (caller already holds a sufficient + /// lock, e.g. an exclusive lock on a parent directory). + BF_LOCK_NONE, + + /// `BF_LOCK_READ` requests a shared lock (multiple readers allowed). + BF_LOCK_READ, + + /// `BF_LOCK_WRITE` requests an exclusive lock (single writer, no readers). + BF_LOCK_WRITE, + _BF_LOCK_MAX, +}; + +/** + * @warning The `bf_lock` structure should only be modified by the locking API, + * not directly, though callers can read any field safely (e.g. file + * descriptors). + */ +struct bf_lock +{ + /// File descriptor of the bpffs directory, -1 if unset. + int bpffs_fd; + + /// File descriptor of the pin directory (`$BPFFS/bpfilter`), -1 if unset. + int pindir_fd; + + /// Lock mode held on `pindir_fd`; `BF_LOCK_NONE` if unlocked. + enum bf_lock_mode pindir_lock; + + /// File descriptor of the chain directory, -1 if unset. + int chain_fd; + + /// Name of the open chain, only valid when `chain_fd` is set. + char *chain_name; + + /// Lock mode held on `chain_fd`; `BF_LOCK_NONE` if unlocked or unset. + enum bf_lock_mode chain_lock; +}; + +/** + * @brief Assign sane defaults to a `bf_lock` object. + * + * This macro should always be used for a `bf_lock` object with the + * `_clean_bf_lock_` attribute. + * + * @return A `bf_lock` object with valid defaults. + */ +#define bf_lock_default() \ + ((struct bf_lock) { \ + .bpffs_fd = -1, \ + .pindir_fd = -1, \ + .pindir_lock = BF_LOCK_NONE, \ + .chain_fd = -1, \ + .chain_name = NULL, \ + .chain_lock = BF_LOCK_NONE, \ + }) + +/** Prefix for staging names used by I3. Callers that walk the pindir (e.g. + * `bf_ctx_get_cgens`) must skip entries with this prefix. + * + * The prefix cannot start with a `.` because bpffs rejects `mkdir` for + * names starting with a dot. It uses double underscore + "bf_staging_" + * to minimise the chance of colliding with a user-chosen chain name: + * the lexer that parses chain names accepts `[a-zA-Z0-9_]+` so a user + * could technically create a chain with the same prefix, but in practice + * they won't. */ +#define BF_LOCK_STAGING_PREFIX "__bf_staging_" + +/// Variable attribute to automatically cleanup a `bf_lock` object. +#define _clean_bf_lock_ __attribute__((cleanup(bf_lock_cleanup))) + +/** + * @brief Open and lock the pin directory. + * + * The pin directory (`$BPFFS/bpfilter`) is created if it doesn't exist. + * Because of I1 (see file header), it is never removed by the library, + * so the inode this function opens is stable for the lifetime of the + * bpffs mount. + * + * @pre + * - The runtime context has been initialized. + * - `lock` is not NULL, and contains sane defaults (see `bf_lock_default()`). + * @post + * - On success: `lock` holds a valid file descriptor on the bpffs, a valid + * file descriptor on the pin directory, and an `flock(2)` of mode `mode` + * on the pin directory. `lock->pindir_lock == mode`. + * - On failure: `lock` is unchanged. + * + * @param lock Handle to populate. Must be initialised via `bf_lock_default()`. + * @param mode Lock mode for the pin directory. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_lock_init(struct bf_lock *lock, enum bf_lock_mode mode); + +/** + * @brief Open and lock the pin directory and a chain directory. + * + * Convenience wrapper that chains `bf_lock_init()` + `bf_lock_acquire_chain()`. + * The caller controls the lock mode for the pin directory (`pindir_mode`) + * and the chain directory (`chain_mode`) independently, per the locking + * matrix. + * + * If the chain directory doesn't exist and `create=true`, it is created + * atomically via stage-and-rename (I3). Creating a chain directory requires + * `pindir_mode == BF_LOCK_WRITE` and `chain_mode == BF_LOCK_WRITE`. + * + * @note If you already own a lock on the pin directory, use + * `bf_lock_acquire_chain()` instead. + * + * @pre + * - The runtime context has been initialized. + * - `lock` is not NULL, and contains sane defaults (see `bf_lock_default()`). + * - `name` is not NULL. + * - `create == true` implies `pindir_mode == BF_LOCK_WRITE` and + * `chain_mode == BF_LOCK_WRITE`. + * @post + * - On success: `lock` holds file descriptors on the bpffs, the pin + * directory (locked with `pindir_mode`), and the chain directory + * (locked with `chain_mode`). `lock->chain_name == name` (owned copy). + * - On failure: `lock` is unchanged. + * + * @param lock Lock object to initialize. + * @param name Name of the chain to lock. + * @param pindir_mode Lock mode for the pin directory. + * @param chain_mode Lock mode for the chain directory. + * @param create If true, create the chain directory if it doesn't exist. + * Requires `pindir_mode == BF_LOCK_WRITE` and + * `chain_mode == BF_LOCK_WRITE`. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_lock_init_for_chain(struct bf_lock *lock, const char *name, + enum bf_lock_mode pindir_mode, + enum bf_lock_mode chain_mode, bool create); + +/** + * @brief Clean up resources held by a lock. + * + * Releases every lock held by `lock`, closes the open file descriptors, + * and removes the chain directory if `BF_LOCK_WRITE` was held on it + * (it might now be empty; `unlinkat(AT_REMOVEDIR)` silently fails if it + * isn't). + * + * Per invariant I1, this function does **not** remove the pin directory + * itself. + * + * This function can be called if `lock` has been assigned sensible defaults + * (using `bf_lock_default`), initialized (using `bf_lock_init*`), or cleaned + * up (using `bf_lock_cleanup`); it is idempotent. + * + * @pre + * - `lock` is not NULL, and is in a valid state. + * @post + * - `lock` is in the default state (all fds are -1, all modes are + * `BF_LOCK_NONE`, `chain_name == NULL`). + * + * @param lock Handle to clean up. + */ +void bf_lock_cleanup(struct bf_lock *lock); + +/** + * @brief Lock a chain directory on an existing pin directory lock. + * + * `lock` must have been successfully initialised by `bf_lock_init` + * (i.e. `bpffs_fd` and `pindir_fd` are valid) and must not already hold a + * chain lock. Depending on `create`: + * - `create == false`: open and lock the existing chain directory. If the + * chain directory doesn't exist, returns `-ENOENT`. Uses the + * "recheck-after-flock" protocol (P1) to detect and retry against + * a concurrent `unlink + recreate` of the name. + * - `create == true`: create the chain directory atomically. Internally + * stages the new directory under a unique `.staging.*` name, acquires + * `BF_LOCK_WRITE` on the staged inode, then publishes it via + * `renameat2(RENAME_NOREPLACE)`. Requires `mode == BF_LOCK_WRITE` and + * the caller must hold `BF_LOCK_WRITE` on the pin directory. If another + * process created the chain first, returns `-EEXIST`. + * + * If creating, opening, or locking the directory fails, `lock` is left + * unchanged. + * + * @pre + * - `lock` is not NULL, has been initialized, and doesn't hold a chain lock. + * - `name` is not NULL. + * - `create == true` implies `mode == BF_LOCK_WRITE` and + * `lock->pindir_lock == BF_LOCK_WRITE`. + * @post + * - On success: `lock->chain_fd` is a valid open (and locked with `mode`) + * file descriptor to the chain directory. `lock->chain_name` is a + * heap-allocated copy of `name`, owned by `lock`. + * - On failure: `lock` is unchanged and remains in a valid state. + * + * @param lock Initialized `bf_lock`. + * @param name Name of the chain to acquire. + * @param mode Lock mode for the chain directory. + * @param create If true, create the chain directory atomically if it + * doesn't exist. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_lock_acquire_chain(struct bf_lock *lock, const char *name, + enum bf_lock_mode mode, bool create); + +/** + * @brief Release a chain lock. + * + * Closes the chain file descriptor. If `BF_LOCK_WRITE` was held on the + * chain, also attempts to remove the (possibly empty) chain pin directory + * via `unlinkat(AT_REMOVEDIR)`; the removal silently no-ops if the + * directory isn't empty. Callers relying on that removal being + * race-free against concurrent readers must also hold the pindir + * `BF_LOCK_WRITE` (I2). + * + * If `lock` doesn't hold a chain lock, this is a no-op. + * + * @pre + * - `lock` is not NULL, has been initialized. + * @post + * - `lock` no longer holds a chain lock; `chain_fd == -1`, + * `chain_name == NULL`, `chain_lock == BF_LOCK_NONE`. + * - Other fields (bpffs/pindir fds and locks) are unchanged. + * + * @param lock Handle to release the chain from. + */ +void bf_lock_release_chain(struct bf_lock *lock); diff --git a/src/libbpfilter/ctx.c b/src/libbpfilter/ctx.c index 3221ed998..ead6b344d 100644 --- a/src/libbpfilter/ctx.c +++ b/src/libbpfilter/ctx.c @@ -25,6 +25,7 @@ #include #include "cgen/cgen.h" +#include "core/lock.h" #define _free_bf_ctx_ __attribute__((cleanup(_bf_ctx_free))) @@ -39,10 +40,6 @@ struct bf_ctx /// BPF token file descriptor int token_fd; - int lock_fd; - - bf_list cgens; - struct bf_elfstub *stubs[_BF_ELFSTUB_MAX]; /// Pass a token to BPF system calls, obtained from bpffs. @@ -114,7 +111,6 @@ static int _bf_ctx_new(struct bf_ctx **ctx, bool with_bpf_token, _ctx->with_bpf_token = with_bpf_token; _ctx->bpffs_path = bpffs_path; _ctx->verbose = verbose; - _ctx->lock_fd = -1; _ctx->token_fd = -1; if (_ctx->with_bpf_token) { @@ -136,8 +132,6 @@ static int _bf_ctx_new(struct bf_ctx **ctx, bool with_bpf_token, _ctx->token_fd = TAKE_FD(token_fd); } - _ctx->cgens = bf_list_default(bf_cgen_free, bf_cgen_pack); - for (enum bf_elfstub_id id = 0; id < _BF_ELFSTUB_MAX; ++id) { r = bf_elfstub_new(&_ctx->stubs[id], id); if (r) @@ -165,8 +159,6 @@ static void _bf_ctx_free(struct bf_ctx **ctx) return; closep(&(*ctx)->token_fd); - closep(&(*ctx)->lock_fd); - bf_list_clean(&(*ctx)->cgens); for (enum bf_elfstub_id id = 0; id < _BF_ELFSTUB_MAX; ++id) bf_elfstub_free(&(*ctx)->stubs[id]); @@ -187,98 +179,9 @@ static void _bf_ctx_dump(const struct bf_ctx *ctx, prefix_t *prefix) DUMP(prefix, "token_fd: %d", ctx->token_fd); - // Codegens - DUMP(bf_dump_prefix_last(prefix), "cgens: bf_list[%lu]", - bf_list_size(&ctx->cgens)); - bf_dump_prefix_push(prefix); - bf_list_foreach (&ctx->cgens, cgen_node) { - struct bf_cgen *cgen = bf_list_node_get_data(cgen_node); - - if (bf_list_is_tail(&ctx->cgens, cgen_node)) - bf_dump_prefix_last(prefix); - - bf_cgen_dump(cgen, prefix); - } - bf_dump_prefix_pop(prefix); - bf_dump_prefix_pop(prefix); } -/** - * See @ref bf_ctx_get_cgen for details. - */ -static struct bf_cgen *_bf_ctx_get_cgen(const struct bf_ctx *ctx, - const char *name) -{ - assert(ctx); - assert(name); - - bf_list_foreach (&ctx->cgens, cgen_node) { - struct bf_cgen *cgen = bf_list_node_get_data(cgen_node); - - if (bf_streq(cgen->chain->name, name)) - return cgen; - } - - return NULL; -} - -/** - * See @ref bf_ctx_get_cgens for details. - */ -static int _bf_ctx_get_cgens(const struct bf_ctx *ctx, bf_list *cgens) -{ - _clean_bf_list_ bf_list _cgens = bf_list_default(NULL, NULL); - int r; - - assert(ctx); - assert(cgens); - - bf_list_foreach (&ctx->cgens, cgen_node) { - r = bf_list_add_tail(&_cgens, bf_list_node_get_data(cgen_node)); - if (r) - return r; - } - - *cgens = bf_list_move(_cgens); - - return 0; -} - -/** - * See @ref bf_ctx_set_cgen for details. - */ -static int _bf_ctx_set_cgen(struct bf_ctx *ctx, struct bf_cgen *cgen) -{ - assert(ctx); - assert(cgen); - - if (_bf_ctx_get_cgen(ctx, cgen->chain->name)) - return bf_err_r(-EEXIST, "codegen already exists in context"); - - return bf_list_add_tail(&ctx->cgens, cgen); -} - -static int _bf_ctx_delete_cgen(struct bf_ctx *ctx, struct bf_cgen *cgen, - bool unload) -{ - bf_list_foreach (&ctx->cgens, cgen_node) { - struct bf_cgen *_cgen = bf_list_node_get_data(cgen_node); - - if (_cgen != cgen) - continue; - - if (unload) - bf_cgen_unload(_cgen); - - bf_list_delete(&ctx->cgens, cgen_node); - - return 0; - } - - return -ENOENT; -} - static void _bf_free_dir(DIR **dir) { if (!*dir) @@ -291,123 +194,83 @@ static void _bf_free_dir(DIR **dir) #define _free_dir_ __attribute__((__cleanup__(_bf_free_dir))) /** - * @brief Discover and restore chains from bpffs context maps. + * @brief Sweep leftover staging directories from a previous run. + * + * `core/lock.c` creates uniquely-named `.staging.*` directories while + * publishing new chain dirs via `renameat2(RENAME_NOREPLACE)`. If a + * process crashes between the `mkdirat` and the `renameat2`, the staging + * dir is orphaned. * - * Iterates subdirectories under `{bpffs}/bpfilter/`, deserializing each - * chain's `bf_ctx` context map into a `bf_cgen` and adding it to the - * global context. The global context must already be initialized via - * `bf_ctx_setup` before calling this function. + * This function walks the pindir under `BF_LOCK_WRITE` and removes any + * `.staging.*` entry whose `flock(LOCK_EX | LOCK_NB)` succeeds (meaning + * nobody currently owns it). Live staging dirs are left alone. * - * @return 0 on success, or a negative errno value on failure. + * Runs once from `bf_ctx_setup()`. Non-fatal: a failure to sweep only + * leaves garbage behind, it does not compromise correctness. */ -static int _bf_ctx_discover(void) +static void _bf_ctx_sweep_staging(void) { - _cleanup_close_ int bpffs_fd = -1; - _cleanup_close_ int pindir_fd = -1; + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); _free_dir_ DIR *dir = NULL; struct dirent *entry; int iter_fd; int r; - bpffs_fd = bf_opendir(_bf_global_ctx->bpffs_path); - if (bpffs_fd < 0) { - return bf_err_r(bpffs_fd, "failed to open bpffs at %s", - _bf_global_ctx->bpffs_path); + r = bf_lock_init(&lock, BF_LOCK_WRITE); + if (r) { + bf_warn_r(r, "failed to lock pindir for staging sweep, skipping"); + return; } - pindir_fd = bf_opendir_at(bpffs_fd, "bpfilter", false); - if (pindir_fd < 0) { - if (pindir_fd == -ENOENT) { - bf_info("no bpfilter pin directory found, nothing to discover"); - return 0; - } - return bf_err_r(pindir_fd, "failed to open pin directory"); + iter_fd = dup(lock.pindir_fd); + if (iter_fd < 0) { + bf_warn_r(-errno, "failed to dup pindir fd for staging sweep"); + return; } - /* fdopendir() takes ownership of the fd: dup so pindir_fd remains valid - * for openat() calls inside the loop. */ - iter_fd = dup(pindir_fd); - if (iter_fd < 0) - return bf_err_r(-errno, "failed to dup pin directory fd"); - dir = fdopendir(iter_fd); if (!dir) { - r = -errno; close(iter_fd); - return bf_err_r(r, "failed to open pin directory for iteration"); + bf_warn_r(-errno, "failed to fdopendir pindir for staging sweep"); + return; } - for (;;) { - _free_bf_cgen_ struct bf_cgen *cgen = NULL; - _cleanup_close_ int chain_fd = -1; - - errno = 0; - entry = readdir(dir); - if (!entry) - break; + while ((entry = readdir(dir))) { + _cleanup_close_ int stage_fd = -1; - if (bf_streq(entry->d_name, ".") || bf_streq(entry->d_name, "..")) + if (!bf_strneq(entry->d_name, BF_LOCK_STAGING_PREFIX, + sizeof(BF_LOCK_STAGING_PREFIX) - 1)) continue; - if (entry->d_type != DT_DIR) + if (entry->d_type != DT_DIR && entry->d_type != DT_UNKNOWN) continue; - chain_fd = openat(pindir_fd, entry->d_name, O_DIRECTORY); - if (chain_fd < 0) { - bf_warn("failed to open chain directory '%s', skipping", - entry->d_name); + stage_fd = openat(lock.pindir_fd, entry->d_name, O_DIRECTORY); + if (stage_fd < 0) continue; - } - r = bf_cgen_new_from_dir_fd(&cgen, chain_fd); - if (r) { - bf_warn("failed to restore chain '%s', skipping", entry->d_name); + /* LOCK_NB: if the staging dir is still live, skip it. */ + if (flock(stage_fd, LOCK_EX | LOCK_NB) < 0) continue; - } - - bf_info("discovered chain '%s'", entry->d_name); - r = bf_list_push(&_bf_global_ctx->cgens, (void **)&cgen); - if (r) { - bf_warn("failed to add restored chain '%s' to context, skipping", - entry->d_name); + if (bf_rmdir_at(lock.pindir_fd, entry->d_name, true)) { + bf_warn("failed to sweep orphan staging dir '%s'", entry->d_name); + } else { + bf_dbg("removed left-over staging directory '%s'", entry->d_name); } } - - if (errno) - return bf_err_r(-errno, "failed to read pin directory"); - - return 0; } int bf_ctx_setup(bool with_bpf_token, const char *bpffs_path, uint16_t verbose) { - _cleanup_close_ int pindir_fd = -1; int r; r = _bf_ctx_new(&_bf_global_ctx, with_bpf_token, bpffs_path, verbose); if (r) return bf_err_r(r, "failed to create new context"); - pindir_fd = bf_ctx_get_pindir_fd(); - if (pindir_fd < 0) { - _bf_ctx_free(&_bf_global_ctx); - return bf_err_r(pindir_fd, "failed to get pin directory FD"); - } - - r = flock(pindir_fd, LOCK_EX | LOCK_NB); - if (r) { - _bf_ctx_free(&_bf_global_ctx); - return bf_err_r(-errno, "failed to lock pin directory"); - } - - r = _bf_ctx_discover(); - if (r) { - _bf_ctx_free(&_bf_global_ctx); - return bf_err_r(r, "failed to discover chains"); - } - - _bf_global_ctx->lock_fd = TAKE_FD(pindir_fd); + /* Reclaim any orphan staging directory left by a previous crash. */ + _bf_ctx_sweep_staging(); return 0; } @@ -417,26 +280,6 @@ void bf_ctx_teardown(void) _bf_ctx_free(&_bf_global_ctx); } -static void _bf_ctx_flush(struct bf_ctx *ctx) -{ - assert(ctx); - - bf_list_foreach (&ctx->cgens, cgen_node) { - struct bf_cgen *cgen = bf_list_node_get_data(cgen_node); - - bf_cgen_unload(cgen); - bf_list_delete(&ctx->cgens, cgen_node); - } -} - -void bf_ctx_flush(void) -{ - if (!_bf_global_ctx) - return; - - _bf_ctx_flush(_bf_global_ctx); -} - void bf_ctx_dump(prefix_t *prefix) { if (!_bf_global_ctx) @@ -445,67 +288,117 @@ void bf_ctx_dump(prefix_t *prefix) _bf_ctx_dump(_bf_global_ctx, prefix); } -struct bf_cgen *bf_ctx_get_cgen(const char *name) +int bf_ctx_get_cgen(struct bf_lock *lock, struct bf_cgen **cgen) { - if (!_bf_global_ctx) - return NULL; + _free_bf_cgen_ struct bf_cgen *_cgen = NULL; + int r; - return _bf_ctx_get_cgen(_bf_global_ctx, name); -} + assert(lock); + assert(cgen); -int bf_ctx_get_cgens(bf_list *cgens) -{ if (!_bf_global_ctx) return bf_err_r(-EINVAL, "context is not initialized"); - return _bf_ctx_get_cgens(_bf_global_ctx, cgens); -} + r = bf_cgen_new_from_dir_fd(&_cgen, lock); + if (r) + return bf_err_r(r, "failed to load chain '%s' from bpffs", + lock->chain_name ? lock->chain_name : "(unknown)"); -int bf_ctx_set_cgen(struct bf_cgen *cgen) -{ - if (!_bf_global_ctx) - return bf_err_r(-EINVAL, "context is not initialized"); + *cgen = TAKE_PTR(_cgen); - return _bf_ctx_set_cgen(_bf_global_ctx, cgen); + return 0; } -int bf_ctx_delete_cgen(struct bf_cgen *cgen, bool unload) +int bf_ctx_get_cgens(struct bf_lock *lock, bf_list **cgens) { + _free_bf_list_ bf_list *_cgens = NULL; + _free_dir_ DIR *dir = NULL; + struct dirent *entry; + int iter_fd; + int r; + + assert(lock); + assert(cgens); + if (!_bf_global_ctx) return bf_err_r(-EINVAL, "context is not initialized"); - return _bf_ctx_delete_cgen(_bf_global_ctx, cgen, unload); -} + r = bf_list_new(&_cgens, &bf_list_ops_default(bf_cgen_free, bf_cgen_pack)); + if (r) + return bf_err_r(r, "failed to allocate cgen list"); -int bf_ctx_token(void) -{ - if (!_bf_global_ctx) - return -1; + /* fdopendir() takes ownership of the fd: dup so lock->pindir_fd remains + * valid for further uses. */ + iter_fd = dup(lock->pindir_fd); + if (iter_fd < 0) + return bf_err_r(-errno, "failed to dup pin directory fd"); - return _bf_global_ctx->token_fd; -} + dir = fdopendir(iter_fd); + if (!dir) { + r = -errno; + close(iter_fd); + return bf_err_r(r, "failed to open pin directory for iteration"); + } -int bf_ctx_get_pindir_fd(void) -{ - _cleanup_close_ int bpffs_fd = -1; - _cleanup_close_ int pindir_fd = -1; + while (true) { + _free_bf_cgen_ struct bf_cgen *cgen = NULL; - if (!_bf_global_ctx) - return bf_err_r(-EINVAL, "context is not initialized"); + errno = 0; + entry = readdir(dir); + if (!entry && errno != 0) { + bf_warn_r(errno, "readdir failed, returning partial results"); + break; + } + if (!entry) + break; - bpffs_fd = bf_opendir(_bf_global_ctx->bpffs_path); - if (bpffs_fd < 0) { - return bf_err_r(bpffs_fd, "failed to open bpffs at %s", - _bf_global_ctx->bpffs_path); - } + if (bf_streq(entry->d_name, ".") || bf_streq(entry->d_name, "..")) + continue; + + if (entry->d_type != DT_DIR) + continue; + + /* Skip in-flight staging directories owned by concurrent writers. */ + if (bf_strneq(entry->d_name, BF_LOCK_STAGING_PREFIX, + sizeof(BF_LOCK_STAGING_PREFIX) - 1)) + continue; + + r = bf_lock_acquire_chain(lock, entry->d_name, BF_LOCK_READ, false); + if (r) { + bf_warn_r(r, "failed to acquire READ lock on chain '%s', skipping", + entry->d_name); + continue; + } + + r = bf_cgen_new_from_dir_fd(&cgen, lock); + if (r) { + bf_warn_r(r, "failed to restore chain '%s', skipping", + entry->d_name); + bf_lock_release_chain(lock); + continue; + } - pindir_fd = bf_opendir_at(bpffs_fd, "bpfilter", true); - if (pindir_fd < 0) { - return bf_err_r(pindir_fd, "failed to open pin directory %s/bpfilter", - _bf_global_ctx->bpffs_path); + bf_lock_release_chain(lock); + + r = bf_list_push(_cgens, (void **)&cgen); + if (r) { + bf_warn_r(r, "failed to push chain '%s' to list, skipping", + entry->d_name); + continue; + } } - return TAKE_FD(pindir_fd); + *cgens = TAKE_PTR(_cgens); + + return 0; +} + +int bf_ctx_token(void) +{ + if (!_bf_global_ctx) + return -1; + + return _bf_global_ctx->token_fd; } const struct bf_elfstub *bf_ctx_get_elfstub(enum bf_elfstub_id id) @@ -523,3 +416,11 @@ bool bf_ctx_is_verbose(enum bf_verbose opt) return _bf_global_ctx->verbose & BF_FLAG(opt); } + +const char *bf_ctx_get_bpffs_path(void) +{ + if (!_bf_global_ctx) + return NULL; + + return _bf_global_ctx->bpffs_path; +} diff --git a/src/libbpfilter/include/bpfilter/ctx.h b/src/libbpfilter/include/bpfilter/ctx.h index 1f92bfbaa..0f8ae26a8 100644 --- a/src/libbpfilter/include/bpfilter/ctx.h +++ b/src/libbpfilter/include/bpfilter/ctx.h @@ -21,11 +21,13 @@ * the main structure used to store the runtime context. * * All the public `bf_ctx_*` functions manipulate a private global context. - * Chain state is persisted in per-chain bpffs context maps and restored - * via `bf_ctx_setup` on restart. + * Chain state is persisted in per-chain bpffs context maps and loaded on + * demand by `bf_ctx_get_cgen()` and `bf_ctx_get_cgens()`; the global + * context does not cache them. */ struct bf_cgen; +struct bf_lock; enum bf_verbose { @@ -58,49 +60,42 @@ void bf_ctx_teardown(void); void bf_ctx_dump(prefix_t *prefix); /** - * @brief Unload and delete all the codegens. - */ -void bf_ctx_flush(void); - -/** - * Get a codegen from the global context. + * @brief Load a codegen from a chain pinned in bpffs. * - * @param name Name of the codegen to get. Can't be NULL. - * @return The requested codegen, or NULL if not found. - */ -struct bf_cgen *bf_ctx_get_cgen(const char *name); - -/** - * Get the list of all @ref bf_cgen in the context. - * - * The @p cgens list returned to the caller does not own the codegens, it can - * safely be cleaned up using @ref bf_list_clean or @ref bf_list_free . + * Reads and deserializes the persisted context map from the chain + * directory referenced by `lock` (the caller must have already acquired + * the chain via `bf_lock_acquire_chain`). On success `*cgen` is set to + * a newly-allocated codegen owned by the caller (use `_free_bf_cgen_`). * - * @param cgens List of @ref bf_cgen to fill. The list will be initialised by - * this function. Can't be NULL. On failure, @p cgens is left unchanged. - * @return 0 on success, or negative errno value on failure. + * @param lock Lock providing the chain directory file descriptor. Must + * hold a valid `chain_fd`. Can't be NULL. + * @param cgen Output pointer to the loaded codegen. Can't be NULL. Left + * unchanged on failure. + * @return 0 on success, or a negative errno value on failure. */ -int bf_ctx_get_cgens(bf_list *cgens); +int bf_ctx_get_cgen(struct bf_lock *lock, struct bf_cgen **cgen); /** - * Add a codegen to the global context. + * @brief Discover and load all codegens persisted under `{bpffs}/bpfilter/`. * - * @param cgen Codegen to add to the context. Can't be NULL. - * @return 0 on success, or a negative errno value on failure. If a chain - * already exist in the context with the same name, the codegen is not - * added to the context and `-EEXIST` is returned. - */ -int bf_ctx_set_cgen(struct bf_cgen *cgen); - -/** - * Delete a codegen from the context. + * The function allocates a new heap list with owning ops + * (`bf_cgen_free`/`bf_cgen_pack`); on success the caller owns it and + * must release it with `bf_list_free` (commonly via `_free_bf_list_`). + * For each chain entry it acquires a `BF_LOCK_READ` chain lock via + * `bf_lock_acquire_chain` for the duration of the deserialise step, + * then releases it before moving to the next chain. Per-entry failures + * are logged and the offending chain is skipped. * - * @param cgen Codegen to delete from the context. The codegen will be freed. - * Can't be NULL. - * @param unload Unload the codegen from the system before deleting it. - * @return 0 on success, or negative errno value on failure. + * @param lock Lock that must already hold the pin directory locked + * (e.g. via `bf_lock_init(BF_LOCK_READ)` or `BF_LOCK_WRITE`). + * Must not currently hold a chain lock. Can't be NULL. + * @param cgens Output pointer to the allocated list. Can't be NULL. + * Must point to a `NULL` `bf_list *` on entry. Left unchanged on + * failure. + * @return 0 on success, or a negative errno value on setup failure + * (cannot open pin directory, allocation failure). */ -int bf_ctx_delete_cgen(struct bf_cgen *cgen, bool unload); +int bf_ctx_get_cgens(struct bf_lock *lock, bf_list **cgens); /** * Get the BPF token file descriptor. @@ -109,14 +104,6 @@ int bf_ctx_delete_cgen(struct bf_cgen *cgen, bool unload); */ int bf_ctx_token(void); -/** - * @brief Return a file descriptor to bpfilter's pin directory. - * - * @return File descriptor to bpfilter's pin directory, or a negative errno - * value on failure. - */ -int bf_ctx_get_pindir_fd(void); - /** * @brief Get a ELF stub from its ID. * @@ -129,3 +116,8 @@ const struct bf_elfstub *bf_ctx_get_elfstub(enum bf_elfstub_id id); * @return true if the given verbose flag is set. */ bool bf_ctx_is_verbose(enum bf_verbose opt); + +/** + * @return Path to the configured BPF filesystem. + */ +const char *bf_ctx_get_bpffs_path(void); diff --git a/src/libbpfilter/include/bpfilter/helper.h b/src/libbpfilter/include/bpfilter/helper.h index c5899b1cc..a68bdb4ca 100644 --- a/src/libbpfilter/include/bpfilter/helper.h +++ b/src/libbpfilter/include/bpfilter/helper.h @@ -345,7 +345,7 @@ static inline bool bf_strneq(const char *lhs, const char *rhs, size_t n) if (!lhs || !rhs) return false; - return strncmp(lhs, rhs, n); + return strncmp(lhs, rhs, n) == 0; } /** diff --git a/tests/harness/test.c b/tests/harness/test.c index 8989fa22f..b45801ac3 100644 --- a/tests/harness/test.c +++ b/tests/harness/test.c @@ -312,3 +312,33 @@ bool bft_matcher_equal(const struct bf_matcher *matcher0, bf_matcher_payload(matcher1), bf_matcher_payload_len(matcher0)); } + +int bft_setup_ctx(void **state) +{ + _free_bft_tmpdir_ struct bft_tmpdir *tmpdir = NULL; + int r; + + r = bft_tmpdir_new(&tmpdir); + if (r) + return r; + + r = bf_ctx_setup(false, tmpdir->dir_path, BF_FLAG(BF_VERBOSE_DEBUG)); + if (r) + return r; + + *state = TAKE_PTR(tmpdir); + + return 0; +} + +int bft_teardown_ctx(void **state) +{ + struct bft_tmpdir *tmpdir = *state; + + bf_ctx_teardown(); + bft_tmpdir_free(&tmpdir); + + *state = NULL; + + return 0; +} diff --git a/tests/harness/test.h b/tests/harness/test.h index 398f0448c..8121c5554 100644 --- a/tests/harness/test.h +++ b/tests/harness/test.h @@ -14,7 +14,9 @@ // clang-format on #include +#include +#include #include #include @@ -32,6 +34,27 @@ struct bf_counter; #define assert_int_lt(expr, ref) assert_true((expr) < (ref)) #define assert_int_lte(expr, ref) assert_true((expr) <= (ref)) +#define assert_dir_exists(tmpdir, name) \ + do { \ + char _path[PATH_MAX]; \ + struct stat _st; \ + \ + (void)snprintf(_path, sizeof(_path), "%s/%s", (tmpdir)->dir_path, \ + (name)); \ + assert_int_equal(stat(_path, &_st), 0); \ + assert_true(S_ISDIR(_st.st_mode)); \ + } while (0) + +#define assert_dir_not_exists(tmpdir, name) \ + do { \ + char _path[PATH_MAX]; \ + struct stat _st; \ + \ + (void)snprintf(_path, sizeof(_path), "%s/%s", (tmpdir)->dir_path, \ + (name)); \ + assert_int_not_equal(stat(_path, &_st), 0); \ + } while (0) + #define assert_enum_to_str(type, to_str, first, max) \ do { \ for (type _start = first; _start < max; ++_start) \ @@ -63,6 +86,7 @@ struct bf_counter; 0); \ } while (0) #define assert_fd_empty(fd) assert_int_equal(fd, -1) +#define assert_fd(fd) assert_int_gte(fd, 0) #define assert_rule_equal(rule0, rule1) \ do { \ @@ -150,6 +174,8 @@ struct bft_tmpdir char *dir_path; }; +#define _free_bft_tmpdir_ __attribute__((cleanup(bft_tmpdir_free))) + int bft_tmpdir_new(struct bft_tmpdir **tmpdir); void bft_tmpdir_free(struct bft_tmpdir **tmpdir); @@ -179,3 +205,32 @@ int bft_hook_drop(enum bf_hook hook); * @return The flavor-specific next verdict. */ int bft_hook_next(enum bf_hook hook); + +/** + * @brief Initialize the global test `bf_ctx`. + * + * @pre + * - `state` is not NULL. + * @post + * - On success: `*state` is a pointer to a valid heap-allocated `bft_tmpdir` + * object, and the global `bf_ctx` has been initalized with the temporary + * directory. + * - On failure: `*state` is unchanged. + * + * @param state Pointer to a custom object, provided by CMocka. + * @return 0 on success, or a negative errno value on failure. + */ +int bft_setup_ctx(void **state); + +/** + * @brief Cleanup the global test `bf_ctx`. + * + * @pre + * - `state` is not NULL, `*state` points to a valid `bft_tmpdir`. + * @post + * - The `bft_tmpdir` has been deallocated, `*state` is NULL. + * + * @param state Pointer to a custom test object, provided by CMocka. + * @return 0 on success, or a negative errno value on failure. + */ +int bft_teardown_ctx(void **state); diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 66c48e1f5..1441eb284 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -39,7 +39,11 @@ function(bf_add_c_test GROUP SOURCE) ) add_dependencies(unit_bin ${target_name}) - target_include_directories(${target_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + target_include_directories(${target_name} + PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/src/libbpfilter + ) add_test(NAME ${target_name} COMMAND $ @@ -76,6 +80,7 @@ bf_add_c_test(unit libbpfilter/btf.c) bf_add_c_test(unit libbpfilter/chain.c) bf_add_c_test(unit libbpfilter/cli.c) bf_add_c_test(unit libbpfilter/counter.c) +bf_add_c_test(unit libbpfilter/ctx.c) bf_add_c_test(unit libbpfilter/dump.c) bf_add_c_test(unit libbpfilter/flavor.c) bf_add_c_test(unit libbpfilter/helper.c) @@ -84,6 +89,7 @@ bf_add_c_test(unit libbpfilter/if.c) bf_add_c_test(unit libbpfilter/io.c) bf_add_c_test(unit libbpfilter/core/hashset.c) bf_add_c_test(unit libbpfilter/core/list.c) +bf_add_c_test(unit libbpfilter/core/lock.c) bf_add_c_test(unit libbpfilter/core/vector.c) bf_add_c_test(unit libbpfilter/logger.c) bf_add_c_test(unit libbpfilter/matcher.c) diff --git a/tests/unit/libbpfilter/cli.c b/tests/unit/libbpfilter/cli.c index 64055dd23..92e8884d3 100644 --- a/tests/unit/libbpfilter/cli.c +++ b/tests/unit/libbpfilter/cli.c @@ -3,53 +3,191 @@ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include + #include +#include +#include #include #include #include #include +#include #include +#include #include "fake.h" #include "test.h" -static void ruleset_set(void **state) +static void ruleset_get(void **state) { (void)state; + { + // Empty ruleset + _clean_bf_list_ bf_list chains = bf_list_default(NULL, NULL); + _clean_bf_list_ bf_list hookopts = bf_list_default(NULL, NULL); + _clean_bf_list_ bf_list counters = bf_list_default(NULL, NULL); + + assert_ok(bf_ruleset_get(&chains, &hookopts, &counters)); + assert_int_equal(bf_list_size(&chains), 0); + assert_int_equal(bf_list_size(&hookopts), 0); + assert_int_equal(bf_list_size(&counters), 0); + } + + { + // Skip corrupt chains + struct bft_tmpdir *tmpdir = *state; + _clean_bf_list_ bf_list chains = bf_list_default(NULL, NULL); + _clean_bf_list_ bf_list hookopts = bf_list_default(NULL, NULL); + _clean_bf_list_ bf_list counters = bf_list_default(NULL, NULL); + char path[PATH_MAX]; + + /* A chain dir with no `bf_ctx` map looks valid to readdir() but cannot be + * deserialized; bf_ruleset_get must surface that as a successful empty + * walk, not a hard error. */ + (void)snprintf(path, sizeof(path), "%s/bpfilter", tmpdir->dir_path); + (void)mkdir(path, 0755); + + (void)snprintf(path, sizeof(path), "%s/bpfilter/orphan", + tmpdir->dir_path); + assert_ok(mkdir(path, 0755)); + + assert_ok(bf_ruleset_get(&chains, &hookopts, &counters)); + assert_int_equal(bf_list_size(&chains), 0); + assert_int_equal(bf_list_size(&hookopts), 0); + assert_int_equal(bf_list_size(&counters), 0); + } +} + +static void ruleset_set(void **state) +{ _clean_bf_list_ bf_list chains = bf_list_default(bf_chain_free, bf_chain_pack); _clean_bf_list_ bf_list hookopts = bf_list_default(bf_hookopts_free, bf_hookopts_pack); + (void)state; + // Mismatched list sizes should fail assert_ok(bf_list_add_tail(&chains, bft_chain_dummy(false))); - assert_int_equal(bf_ruleset_set(&chains, &hookopts), -EINVAL); + assert_err(bf_ruleset_set(&chains, &hookopts)); +} + +static void ruleset_flush(void **state) +{ + (void)state; + + // Empty ruleset + assert_ok(bf_ruleset_flush()); +} + +static void chain_set(void **state) +{ + _free_bf_chain_ struct bf_chain *chain = bft_chain_dummy(false); + + (void)state; + + assert_err(bf_chain_set(chain, NULL)); +} + +static void chain_get(void **state) +{ + _free_bf_chain_ struct bf_chain *chain = NULL; + _free_bf_hookopts_ struct bf_hookopts *hookopts = NULL; + _clean_bf_list_ struct bf_list counters = bf_list_default(NULL, NULL); + + (void)state; + + assert_err(bf_chain_get("invalid_chain", &chain, &hookopts, &counters)); } static void chain_prog_fd(void **state) { (void)state; - // NULL name should fail - assert_int_equal(bf_chain_prog_fd(NULL), -EINVAL); + assert_err(bf_chain_prog_fd("invalid_chain")); } static void chain_logs_fd(void **state) { (void)state; - // NULL name should fail - assert_int_equal(bf_chain_logs_fd(NULL), -EINVAL); + assert_err(bf_chain_logs_fd("invalid_chain")); +} + +static void chain_load(void **state) +{ + _free_bf_chain_ struct bf_chain *chain = bft_chain_dummy(true); + + (void)state; + + assert_err(bf_chain_load(chain)); +} + +static void chain_attach(void **state) +{ + struct bf_hookopts hookopts = {}; + + (void)state; + + assert_err(bf_chain_attach("invalid_chain", &hookopts)); +} + +static void chain_update(void **state) +{ + _free_bf_chain_ struct bf_chain *chain = bft_chain_dummy(true); + + (void)state; + + assert_err(bf_chain_update(chain)); +} + +static void chain_update_set(void **state) +{ + _free_bf_set_ struct bf_set *set0 = bft_set_dummy(3); + _free_bf_set_ struct bf_set *set1 = bft_set_dummy(2); + + (void)state; + + assert_err(bf_chain_update_set("invalid_name", set0, set1)); +} + +static void chain_flush(void **state) +{ + (void)state; + + assert_err(bf_chain_flush("invalid_chain")); } int main(void) { const struct CMUnitTest tests[] = { - cmocka_unit_test(ruleset_set), - cmocka_unit_test(chain_prog_fd), - cmocka_unit_test(chain_logs_fd), + cmocka_unit_test_setup_teardown(ruleset_get, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(ruleset_set, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(ruleset_flush, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(chain_set, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(chain_get, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(chain_prog_fd, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(chain_logs_fd, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(chain_load, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(chain_attach, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(chain_update, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(chain_update_set, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(chain_flush, bft_setup_ctx, + bft_teardown_ctx), }; return cmocka_run_group_tests(tests, NULL, NULL); diff --git a/tests/unit/libbpfilter/core/lock.c b/tests/unit/libbpfilter/core/lock.c new file mode 100644 index 000000000..cf446e9b0 --- /dev/null +++ b/tests/unit/libbpfilter/core/lock.c @@ -0,0 +1,649 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + */ + +#include "core/lock.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "test.h" + +/* ------------------------------------------------------------------ + * bf_lock_default() + * ------------------------------------------------------------------ */ + +/* Post-condition of bf_lock_default(): all fds == -1, all locks == NONE, + * chain_name == NULL. */ +static void default_values(void **state) +{ + struct bf_lock lock = bf_lock_default(); + + (void)state; + + assert_int_equal(lock.bpffs_fd, -1); + assert_int_equal(lock.pindir_fd, -1); + assert_int_equal(lock.pindir_lock, BF_LOCK_NONE); + assert_int_equal(lock.chain_fd, -1); + assert_null(lock.chain_name); + assert_int_equal(lock.chain_lock, BF_LOCK_NONE); +} + +/* ------------------------------------------------------------------ + * bf_lock_init() + * ------------------------------------------------------------------ */ + +/* Success post: bpffs_fd and pindir_fd are valid, pindir_lock == mode. */ +static void init_success_post_state(void **state) +{ + struct bft_tmpdir *tmpdir = *state; + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + + assert_ok(bf_lock_init(&lock, BF_LOCK_READ)); + assert_fd(lock.bpffs_fd); + assert_fd(lock.pindir_fd); + assert_int_equal(lock.pindir_lock, BF_LOCK_READ); + assert_int_equal(lock.chain_fd, -1); + assert_null(lock.chain_name); + assert_int_equal(lock.chain_lock, BF_LOCK_NONE); + + /* The pindir is created lazily. */ + assert_dir_exists(tmpdir, "bpfilter"); +} + +/* Failure post: on failure, `lock` is unchanged. */ +static void init_failure_preserves_lock(void **state) +{ + _clean_bf_lock_ struct bf_lock holder = bf_lock_default(); + struct bf_lock lock = bf_lock_default(); + + (void)state; + + /* Hold WRITE on the pindir so a second WRITE init will fail. */ + assert_ok(bf_lock_init(&holder, BF_LOCK_WRITE)); + + assert_err(bf_lock_init(&lock, BF_LOCK_WRITE)); + + /* lock is unchanged (still in default state). */ + assert_int_equal(lock.bpffs_fd, -1); + assert_int_equal(lock.pindir_fd, -1); + assert_int_equal(lock.pindir_lock, BF_LOCK_NONE); +} + +/* ------------------------------------------------------------------ + * flock matrix: WRITE / READ / NONE compatibility + * ------------------------------------------------------------------ */ + +static void pindir_lock_matrix(void **state) +{ + (void)state; + + { + // WRITE exclude other WRITE and READ + _clean_bf_lock_ struct bf_lock lock1 = bf_lock_default(); + _clean_bf_lock_ struct bf_lock lock2 = bf_lock_default(); + + assert_ok(bf_lock_init(&lock1, BF_LOCK_WRITE)); + assert_err(bf_lock_init(&lock2, BF_LOCK_WRITE)); + assert_err(bf_lock_init(&lock2, BF_LOCK_READ)); + assert_ok(bf_lock_init(&lock2, BF_LOCK_NONE)); + } + + { + // READ exclude WRITE + _clean_bf_lock_ struct bf_lock lock1 = bf_lock_default(); + _clean_bf_lock_ struct bf_lock lock2 = bf_lock_default(); + + assert_ok(bf_lock_init(&lock1, BF_LOCK_READ)); + assert_err(bf_lock_init(&lock2, BF_LOCK_WRITE)); + assert_ok(bf_lock_init(&lock2, BF_LOCK_READ)); + assert_ok(bf_lock_init(&lock2, BF_LOCK_NONE)); + } + + { + // NONE allows all + _clean_bf_lock_ struct bf_lock lock1 = bf_lock_default(); + _clean_bf_lock_ struct bf_lock lock2 = bf_lock_default(); + _clean_bf_lock_ struct bf_lock lock3 = bf_lock_default(); + + assert_ok(bf_lock_init(&lock1, BF_LOCK_NONE)); + + assert_ok(bf_lock_init(&lock2, BF_LOCK_WRITE)); + bf_lock_cleanup(&lock2); + + assert_ok(bf_lock_init(&lock2, BF_LOCK_READ)); + bf_lock_cleanup(&lock2); + } +} + +/* After cleanup, the pindir is still present on disk (never rmdir'd by + * the library). Multiple init/cleanup cycles never remove the pindir. */ +static void pindir_survives_repeated_cycles(void **state) +{ + struct bft_tmpdir *tmpdir = *state; + + for (int i = 0; i < 5; ++i) { + struct bf_lock lock = bf_lock_default(); + + assert_ok(bf_lock_init(&lock, BF_LOCK_READ)); + bf_lock_cleanup(&lock); + assert_dir_exists(tmpdir, "bpfilter"); + } +} + +/* ------------------------------------------------------------------ + * bf_lock_acquire_chain() + * ------------------------------------------------------------------ */ + +/* Invalid lock (default state) => -EBADFD; lock unchanged. */ +static void acquire_chain_uninitialized_rejects(void **state) +{ + struct bft_tmpdir *tmpdir = *state; + struct bf_lock lock = bf_lock_default(); + + // Pindir not locked, can't acquire a chain + assert_err(bf_lock_acquire_chain(&lock, "c", BF_LOCK_READ, false)); + assert_int_equal(lock.chain_fd, -1); + assert_null(lock.chain_name); + assert_int_equal(lock.chain_lock, BF_LOCK_NONE); + assert_dir_not_exists(tmpdir, "bpfilter/c"); +} + +/* Can't lock a chain twice */ +static void acquire_chain_double_rejects(void **state) +{ + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + struct bft_tmpdir *tmpdir = *state; + + assert_ok(bf_lock_init(&lock, BF_LOCK_WRITE)); + assert_ok(bf_lock_acquire_chain(&lock, "first", BF_LOCK_WRITE, true)); + assert_dir_exists(tmpdir, "bpfilter/first"); + + // Try to acquire the same chain again + assert_err(bf_lock_acquire_chain(&lock, "first", BF_LOCK_WRITE, true)); + + // Try to acquire another chain + assert_err(bf_lock_acquire_chain(&lock, "second", BF_LOCK_WRITE, true)); + assert_dir_not_exists(tmpdir, "bpfilter/second"); +} + +/* Chain dir is created when WRITE is used */ +static void acquire_chain_create(void **state) +{ + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + struct bft_tmpdir *tmpdir = *state; + + assert_ok(bf_lock_init(&lock, BF_LOCK_WRITE)); + + // Acquire with READ and create=true fails + assert_err(bf_lock_acquire_chain(&lock, "c", BF_LOCK_READ, true)); + assert_int_equal(lock.chain_fd, -1); + assert_null(lock.chain_name); + assert_dir_not_exists(tmpdir, "bpfilter/c"); + + // Acquire with WRITE and create=true (empty chain dir is removed) + assert_ok(bf_lock_acquire_chain(&lock, "c", BF_LOCK_WRITE, true)); + assert_fd(lock.chain_fd); + assert_string_equal(lock.chain_name, "c"); + assert_dir_exists(tmpdir, "bpfilter/c"); + + bf_lock_release_chain(&lock); + assert_dir_not_exists(tmpdir, "bpfilter/c"); +} + +/* Can't lock a non-existing chain without create=true */ +static void acquire_chain_missing_fails(void **state) +{ + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + struct bft_tmpdir *tmpdir = *state; + + assert_ok(bf_lock_init(&lock, BF_LOCK_READ)); + assert_err(bf_lock_acquire_chain(&lock, "absent", BF_LOCK_READ, false)); + assert_dir_not_exists(tmpdir, "bpfilter/absent"); +} + +/* ------------------------------------------------------------------ + * Chain-level flock matrix + * ------------------------------------------------------------------ */ + +/* Two READ chain locks on the same chain are compatible. */ +static void chain_read_compatible_read(void **state) +{ + _clean_bf_lock_ struct bf_lock lock1 = bf_lock_default(); + _clean_bf_lock_ struct bf_lock lock2 = bf_lock_default(); + + (void)state; + + { + _clean_bf_lock_ struct bf_lock prep = bf_lock_default(); + assert_ok(bf_lock_init(&prep, BF_LOCK_WRITE)); + assert_ok(bf_lock_acquire_chain(&prep, "shared", BF_LOCK_WRITE, true)); + (void)mknodat(prep.chain_fd, "keepalive", S_IFREG | 0644, 0); + } + + assert_ok(bf_lock_init_for_chain(&lock1, "shared", BF_LOCK_READ, + BF_LOCK_READ, false)); + assert_ok(bf_lock_init_for_chain(&lock2, "shared", BF_LOCK_READ, + BF_LOCK_READ, false)); +} + +/* Two locks on DIFFERENT chains are compatible, even when both hold WRITE. + * `bf_ruleset_set` relies on this: it iterates per-chain WRITE locks while + * holding the pindir WRITE, and each chain's flock must not contend with + * the others. */ +static void chain_lock_isolates_per_chain(void **state) +{ + _clean_bf_lock_ struct bf_lock lock_a = bf_lock_default(); + _clean_bf_lock_ struct bf_lock lock_b = bf_lock_default(); + + (void)state; + + /* Materialise both chains so they can be opened with create=false. */ + { + _clean_bf_lock_ struct bf_lock prep = bf_lock_default(); + assert_ok(bf_lock_init(&prep, BF_LOCK_WRITE)); + assert_ok(bf_lock_acquire_chain(&prep, "alpha", BF_LOCK_WRITE, true)); + (void)mknodat(prep.chain_fd, "keepalive", S_IFREG | 0644, 0); + + bf_lock_release_chain(&prep); + assert_ok(bf_lock_acquire_chain(&prep, "beta", BF_LOCK_WRITE, true)); + (void)mknodat(prep.chain_fd, "keepalive", S_IFREG | 0644, 0); + } + + assert_ok(bf_lock_init_for_chain(&lock_a, "alpha", BF_LOCK_READ, + BF_LOCK_WRITE, false)); + assert_string_equal(lock_a.chain_name, "alpha"); + assert_int_equal(lock_a.chain_lock, BF_LOCK_WRITE); + + /* Independent chain: WRITE on "beta" must not contend with WRITE on + * "alpha". */ + assert_ok(bf_lock_init_for_chain(&lock_b, "beta", BF_LOCK_READ, + BF_LOCK_WRITE, false)); + assert_string_equal(lock_b.chain_name, "beta"); + assert_int_equal(lock_b.chain_lock, BF_LOCK_WRITE); +} + +/* ------------------------------------------------------------------ + * bf_lock_release_chain() post-conditions (I2) + * ------------------------------------------------------------------ */ + +/* `bf_lock_release_chain` is idempotent: calling it twice in a row is a + * silent no-op for the second call. Calling it on a default-initialised + * lock (no fds at all) is also a no-op (modulo a warning). The caller + * relies on this for `_clean_bf_lock_` correctness when an early failure + * leaves the lock partially populated. */ +static void release_chain_idempotent(void **state) +{ + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + + (void)state; + + /* Default lock: silent no-op, fields stay defaulted. */ + bf_lock_release_chain(&lock); + assert_int_equal(lock.chain_fd, -1); + assert_null(lock.chain_name); + assert_int_equal(lock.chain_lock, BF_LOCK_NONE); + + /* Initialised lock with no chain held: still a no-op, twice. */ + assert_ok(bf_lock_init(&lock, BF_LOCK_WRITE)); + bf_lock_release_chain(&lock); + bf_lock_release_chain(&lock); + assert_fd(lock.pindir_fd); + assert_int_equal(lock.pindir_lock, BF_LOCK_WRITE); + assert_int_equal(lock.chain_fd, -1); + assert_null(lock.chain_name); + assert_int_equal(lock.chain_lock, BF_LOCK_NONE); + + /* Acquire + release + release again: the second release must be a + * no-op and not double-close the chain fd or double-free chain_name. */ + assert_ok(bf_lock_acquire_chain(&lock, "idempotent", BF_LOCK_WRITE, true)); + bf_lock_release_chain(&lock); + bf_lock_release_chain(&lock); + assert_int_equal(lock.chain_fd, -1); + assert_null(lock.chain_name); + assert_int_equal(lock.chain_lock, BF_LOCK_NONE); +} + +/* Acquire then release then re-acquire on the same `bf_lock`: the second + * acquire must succeed and fully populate the chain fields. */ +static void release_then_reacquire(void **state) +{ + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + + (void)state; + + assert_ok(bf_lock_init(&lock, BF_LOCK_WRITE)); + + assert_ok(bf_lock_acquire_chain(&lock, "first", BF_LOCK_WRITE, true)); + assert_string_equal(lock.chain_name, "first"); + assert_fd(lock.chain_fd); + assert_int_equal(lock.chain_lock, BF_LOCK_WRITE); + + bf_lock_release_chain(&lock); + assert_int_equal(lock.chain_fd, -1); + assert_null(lock.chain_name); + assert_int_equal(lock.chain_lock, BF_LOCK_NONE); + + /* Re-acquire on the same lock: must succeed and update all chain + * fields, including the new name. */ + assert_ok(bf_lock_acquire_chain(&lock, "second", BF_LOCK_WRITE, true)); + assert_string_equal(lock.chain_name, "second"); + assert_fd(lock.chain_fd); + assert_int_equal(lock.chain_lock, BF_LOCK_WRITE); + + /* The pindir lock must be unchanged across the release/reacquire. */ + assert_int_equal(lock.pindir_lock, BF_LOCK_WRITE); +} + +/* `BF_LOCK_NONE` on the chain side opens the chain dir without taking an + * `flock`. The chain dir must be observable as locked-with-NONE in the + * post-state, and `bf_lock_release_chain` must NOT try to remove it (the + * removal gate is `chain_lock == BF_LOCK_WRITE`). */ +static void chain_lock_none_acquire_and_release(void **state) +{ + struct bft_tmpdir *tmpdir = *state; + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + + /* Pre-create the chain dir; we'll open it with chain mode == NONE. */ + { + _clean_bf_lock_ struct bf_lock prep = bf_lock_default(); + assert_ok(bf_lock_init(&prep, BF_LOCK_WRITE)); + assert_ok( + bf_lock_acquire_chain(&prep, "none_chain", BF_LOCK_WRITE, true)); + (void)mknodat(prep.chain_fd, "keepalive", S_IFREG | 0644, 0); + } + + assert_ok(bf_lock_init(&lock, BF_LOCK_READ)); + assert_ok(bf_lock_acquire_chain(&lock, "none_chain", BF_LOCK_NONE, false)); + + /* Because chain_lock != WRITE, release must NOT remove the chain dir, + * even if it were empty. */ + bf_lock_release_chain(&lock); + assert_dir_exists(tmpdir, "bpfilter/none_chain"); +} + +/* No-op on a lock with no chain held. */ +static void release_no_chain_is_noop(void **state) +{ + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + + (void)state; + + assert_ok(bf_lock_init(&lock, BF_LOCK_READ)); + bf_lock_release_chain(&lock); + + /* Pindir state untouched. */ + assert_fd(lock.pindir_fd); + assert_int_equal(lock.pindir_lock, BF_LOCK_READ); + assert_int_equal(lock.chain_fd, -1); + assert_null(lock.chain_name); + assert_int_equal(lock.chain_lock, BF_LOCK_NONE); +} + +/* WRITE release removes an empty chain dir; the chain fields are reset. */ +static void release_write_removes_empty_chain(void **state) +{ + struct bft_tmpdir *tmpdir = *state; + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + + assert_ok(bf_lock_init(&lock, BF_LOCK_WRITE)); + assert_ok(bf_lock_acquire_chain(&lock, "empty", BF_LOCK_WRITE, true)); + assert_dir_exists(tmpdir, "bpfilter/empty"); + + bf_lock_release_chain(&lock); + + /* Empty chain dir was removed. */ + assert_dir_not_exists(tmpdir, "bpfilter/empty"); + /* Chain fields reset. */ + assert_int_equal(lock.chain_fd, -1); + assert_null(lock.chain_name); + assert_int_equal(lock.chain_lock, BF_LOCK_NONE); + /* Pindir state still valid. */ + assert_fd(lock.pindir_fd); + assert_int_equal(lock.pindir_lock, BF_LOCK_WRITE); +} + +/* WRITE release does NOT remove a non-empty chain dir. */ +static void release_write_keeps_nonempty_chain(void **state) +{ + struct bft_tmpdir *tmpdir = *state; + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + + assert_ok(bf_lock_init(&lock, BF_LOCK_WRITE)); + assert_ok(bf_lock_acquire_chain(&lock, "populated", BF_LOCK_WRITE, true)); + /* Populate so rmdir returns ENOTEMPTY and no-ops. */ + (void)mknodat(lock.chain_fd, "inside", S_IFREG | 0644, 0); + + bf_lock_release_chain(&lock); + + assert_dir_exists(tmpdir, "bpfilter/populated"); +} + +/* READ release does NOT remove the chain dir (I2). */ +static void release_read_keeps_chain(void **state) +{ + struct bft_tmpdir *tmpdir = *state; + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + + (void)state; + + /* Create and populate so an accidental rmdir would fail silently (we + * really want to know the *code path* isn't even trying). We still + * observe via the "chain stays" invariant. */ + { + _clean_bf_lock_ struct bf_lock prep = bf_lock_default(); + assert_ok(bf_lock_init(&prep, BF_LOCK_WRITE)); + assert_ok(bf_lock_acquire_chain(&prep, "reader", BF_LOCK_WRITE, true)); + (void)mknodat(prep.chain_fd, "keepalive", S_IFREG | 0644, 0); + } + + assert_ok(bf_lock_init_for_chain(&lock, "reader", BF_LOCK_READ, + BF_LOCK_READ, false)); + + bf_lock_release_chain(&lock); + assert_dir_exists(tmpdir, "bpfilter/reader"); +} + +/* ------------------------------------------------------------------ + * bf_lock_cleanup() + * ------------------------------------------------------------------ */ + +/* Cleanup is idempotent and safe on a default lock. */ +static void cleanup_idempotent(void **state) +{ + struct bf_lock lock = bf_lock_default(); + + (void)state; + + bf_lock_cleanup(&lock); + bf_lock_cleanup(&lock); + assert_int_equal(lock.bpffs_fd, -1); + assert_int_equal(lock.pindir_fd, -1); + + assert_ok(bf_lock_init(&lock, BF_LOCK_READ)); + bf_lock_cleanup(&lock); + bf_lock_cleanup(&lock); + + assert_int_equal(lock.bpffs_fd, -1); + assert_int_equal(lock.pindir_fd, -1); + assert_int_equal(lock.pindir_lock, BF_LOCK_NONE); + assert_int_equal(lock.chain_fd, -1); + assert_null(lock.chain_name); + assert_int_equal(lock.chain_lock, BF_LOCK_NONE); +} + +/* Cleanup releases the pindir lock so another caller can immediately + * acquire it. */ +static void cleanup_releases_pindir_lock(void **state) +{ + struct bf_lock lock1 = bf_lock_default(); + _clean_bf_lock_ struct bf_lock lock2 = bf_lock_default(); + + (void)state; + + assert_ok(bf_lock_init(&lock1, BF_LOCK_WRITE)); + bf_lock_cleanup(&lock1); + + assert_ok(bf_lock_init(&lock2, BF_LOCK_WRITE)); +} + +/* Cleanup on a lock holding a chain WRITE lock releases both locks and + * removes the empty chain dir; the pindir itself is kept (I1). */ +static void cleanup_with_chain_lock(void **state) +{ + struct bft_tmpdir *tmpdir = *state; + struct bf_lock lock = bf_lock_default(); + + assert_ok(bf_lock_init(&lock, BF_LOCK_WRITE)); + assert_ok(bf_lock_acquire_chain(&lock, "c", BF_LOCK_WRITE, true)); + assert_dir_exists(tmpdir, "bpfilter/c"); + + bf_lock_cleanup(&lock); + + /* Chain dir removed (empty), pindir kept (I1). */ + assert_dir_not_exists(tmpdir, "bpfilter/c"); + assert_dir_exists(tmpdir, "bpfilter"); + assert_int_equal(lock.bpffs_fd, -1); + assert_int_equal(lock.pindir_fd, -1); + assert_int_equal(lock.chain_fd, -1); +} + +/* ------------------------------------------------------------------ + * bf_lock_init_for_chain() + * ------------------------------------------------------------------ */ + +/* Reject create=true with pindir_mode != WRITE. */ +static void init_for_chain_create_needs_write_pindir(void **state) +{ + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + + (void)state; + + assert_int_equal( + bf_lock_init_for_chain(&lock, "c", BF_LOCK_READ, BF_LOCK_WRITE, true), + -EINVAL); + assert_int_equal(lock.bpffs_fd, -1); + assert_int_equal(lock.pindir_fd, -1); + assert_int_equal(lock.chain_fd, -1); + assert_null(lock.chain_name); +} + +/* Reject create=true with chain mode != WRITE. */ +static void init_for_chain_create_needs_write_chain(void **state) +{ + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + + (void)state; + + assert_int_equal( + bf_lock_init_for_chain(&lock, "c", BF_LOCK_WRITE, BF_LOCK_READ, true), + -EINVAL); + assert_int_equal(lock.bpffs_fd, -1); + assert_int_equal(lock.pindir_fd, -1); +} + +/* Success: locks both dirs with the requested modes. */ +static void init_for_chain_success(void **state) +{ + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + + (void)state; + + assert_ok(bf_lock_init_for_chain(&lock, "both", BF_LOCK_WRITE, + BF_LOCK_WRITE, true)); + assert_fd(lock.bpffs_fd); + assert_fd(lock.pindir_fd); + assert_int_equal(lock.pindir_lock, BF_LOCK_WRITE); + assert_fd(lock.chain_fd); + assert_string_equal(lock.chain_name, "both"); + assert_int_equal(lock.chain_lock, BF_LOCK_WRITE); +} + +/* Failure preserves the lock in default state. */ +static void init_for_chain_failure_preserves_lock(void **state) +{ + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + + (void)state; + + /* Missing chain, create=false. */ + assert_int_equal(bf_lock_init_for_chain(&lock, "absent", BF_LOCK_READ, + BF_LOCK_READ, false), + -ENOENT); + assert_int_equal(lock.bpffs_fd, -1); + assert_int_equal(lock.pindir_fd, -1); + assert_int_equal(lock.chain_fd, -1); + assert_null(lock.chain_name); +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(default_values), + + cmocka_unit_test_setup_teardown(init_success_post_state, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(init_failure_preserves_lock, + bft_setup_ctx, bft_teardown_ctx), + cmocka_unit_test_setup_teardown(pindir_lock_matrix, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(pindir_survives_repeated_cycles, + bft_setup_ctx, bft_teardown_ctx), + cmocka_unit_test_setup_teardown(acquire_chain_uninitialized_rejects, + bft_setup_ctx, bft_teardown_ctx), + cmocka_unit_test_setup_teardown(acquire_chain_double_rejects, + bft_setup_ctx, bft_teardown_ctx), + cmocka_unit_test_setup_teardown(chain_read_compatible_read, + bft_setup_ctx, bft_teardown_ctx), + cmocka_unit_test_setup_teardown(chain_lock_isolates_per_chain, + bft_setup_ctx, bft_teardown_ctx), + cmocka_unit_test_setup_teardown(chain_lock_none_acquire_and_release, + bft_setup_ctx, bft_teardown_ctx), + cmocka_unit_test_setup_teardown(release_chain_idempotent, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(release_then_reacquire, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(release_no_chain_is_noop, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(release_write_removes_empty_chain, + bft_setup_ctx, bft_teardown_ctx), + cmocka_unit_test_setup_teardown(release_write_keeps_nonempty_chain, + bft_setup_ctx, bft_teardown_ctx), + cmocka_unit_test_setup_teardown(release_read_keeps_chain, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(cleanup_idempotent, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(cleanup_releases_pindir_lock, + bft_setup_ctx, bft_teardown_ctx), + cmocka_unit_test_setup_teardown(cleanup_with_chain_lock, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown( + init_for_chain_create_needs_write_pindir, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(init_for_chain_create_needs_write_chain, + bft_setup_ctx, bft_teardown_ctx), + cmocka_unit_test_setup_teardown(init_for_chain_success, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(init_for_chain_failure_preserves_lock, + bft_setup_ctx, bft_teardown_ctx), + cmocka_unit_test_setup_teardown(acquire_chain_missing_fails, + bft_setup_ctx, bft_teardown_ctx), + cmocka_unit_test_setup_teardown(acquire_chain_create, bft_setup_ctx, + bft_teardown_ctx), + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +} diff --git a/tests/unit/libbpfilter/ctx.c b/tests/unit/libbpfilter/ctx.c new file mode 100644 index 000000000..323fc3a2b --- /dev/null +++ b/tests/unit/libbpfilter/ctx.c @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "core/lock.h" +#include "test.h" + +static void no_setup(void **state) +{ + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + + (void)state; + + /* All ctx accessors must reject calls made before bf_ctx_setup. */ + assert_int_equal(bf_lock_init(&lock, BF_LOCK_READ), -EINVAL); + assert_int_equal(bf_ctx_token(), -1); + assert_null(bf_ctx_get_elfstub(0)); + assert_false(bf_ctx_is_verbose(BF_VERBOSE_DEBUG)); +} + +static void get_cgens_empty(void **state) +{ + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + _free_bf_list_ bf_list *cgens = NULL; + + (void)state; + + assert_ok(bf_lock_init(&lock, BF_LOCK_READ)); + assert_ok(bf_ctx_get_cgens(&lock, &cgens)); + assert_non_null(cgens); + assert_int_equal(bf_list_size(cgens), 0); +} + +static void get_cgens_skips_corrupt(void **state) +{ + struct bft_tmpdir *tmpdir = *state; + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + _free_bf_list_ bf_list *cgens = NULL; + char pindir_path[PATH_MAX]; + char dir_path[PATH_MAX]; + char file_path[PATH_MAX]; + _cleanup_close_ int fd = -1; + + /* The pin directory is now created unconditionally by `bf_ctx_setup` + * (I1); just make sure it exists, don't treat an existing one as an + * error. */ + (void)snprintf(pindir_path, sizeof(pindir_path), "%s/bpfilter", + tmpdir->dir_path); + (void)mkdir(pindir_path, 0755); + + /* Create a chain dir without a `bf_ctx` map: it should be warn-and-skipped + * during discovery. */ + (void)snprintf(dir_path, sizeof(dir_path), "%s/bpfilter/orphan_chain", + tmpdir->dir_path); + assert_ok(mkdir(dir_path, 0755)); + + /* Drop a regular file alongside the chain dirs to verify the !DT_DIR + * branch is skipped. */ + (void)snprintf(file_path, sizeof(file_path), "%s/bpfilter/stray_file", + tmpdir->dir_path); + assert_fd(fd = open(file_path, O_CREAT | O_WRONLY, 0644)); + + assert_ok(bf_lock_init(&lock, BF_LOCK_READ)); + assert_ok(bf_ctx_get_cgens(&lock, &cgens)); + assert_non_null(cgens); + assert_int_equal(bf_list_size(cgens), 0); +} + +static void get_cgen_corrupt_returns_error(void **state) +{ + struct bft_tmpdir *tmpdir = *state; + _clean_bf_lock_ struct bf_lock lock = bf_lock_default(); + struct bf_cgen *cgen = NULL; + char pindir_path[PATH_MAX]; + char dir_path[PATH_MAX]; + + (void)snprintf(pindir_path, sizeof(pindir_path), "%s/bpfilter", + tmpdir->dir_path); + (void)mkdir(pindir_path, 0755); + + (void)snprintf(dir_path, sizeof(dir_path), "%s/bpfilter/broken", + tmpdir->dir_path); + assert_ok(mkdir(dir_path, 0755)); + + /* Chain dir exists but the `bf_ctx` map is missing: bf_cgen_new_from_dir_fd + * fails, and the error must be propagated (not swallowed as -ENOENT). */ + assert_ok(bf_lock_init_for_chain(&lock, "broken", BF_LOCK_READ, + BF_LOCK_READ, false)); + assert_err(bf_ctx_get_cgen(&lock, &cgen)); + assert_null(cgen); +} + +static void verbose_flags(void **state) +{ + (void)state; + + assert_true(bf_ctx_is_verbose(BF_VERBOSE_DEBUG)); + assert_false(bf_ctx_is_verbose(BF_VERBOSE_BPF)); + assert_false(bf_ctx_is_verbose(BF_VERBOSE_BYTECODE)); +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(no_setup), + cmocka_unit_test_setup_teardown(get_cgens_empty, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(get_cgens_skips_corrupt, bft_setup_ctx, + bft_teardown_ctx), + cmocka_unit_test_setup_teardown(get_cgen_corrupt_returns_error, + bft_setup_ctx, bft_teardown_ctx), + cmocka_unit_test_setup_teardown(verbose_flags, bft_setup_ctx, + bft_teardown_ctx), + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +}