From bc38307ad085c9dc9f9d885c7bf84cf61cfdc1e5 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Thu, 13 Nov 2025 10:29:51 -0800 Subject: [PATCH 1/2] bug: Fix lockup when RLIMIT_NOFILE is large --- Sources/libreprl/libreprl-posix.c | 119 ++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 7 deletions(-) diff --git a/Sources/libreprl/libreprl-posix.c b/Sources/libreprl/libreprl-posix.c index 34905d91d..f8e7824a4 100644 --- a/Sources/libreprl/libreprl-posix.c +++ b/Sources/libreprl/libreprl-posix.c @@ -23,9 +23,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -36,16 +38,28 @@ #include #include #include +#ifdef __linux__ +#include +#endif // Well-known file descriptor numbers for reprl <-> child communication, child process side +// Make sure you modify reprl_fds[] below if you change these. #define REPRL_CHILD_CTRL_IN 100 #define REPRL_CHILD_CTRL_OUT 101 #define REPRL_CHILD_DATA_IN 102 #define REPRL_CHILD_DATA_OUT 103 +static const int reprl_fds[] = { + REPRL_CHILD_CTRL_IN, + REPRL_CHILD_CTRL_OUT, + REPRL_CHILD_DATA_IN, + REPRL_CHILD_DATA_OUT +}; + /// Maximum timeout in microseconds. Mostly just limited by the fact that the timeout in milliseconds has to fit into a 32-bit integer. #define REPRL_MAX_TIMEOUT_IN_MICROSECONDS ((uint64_t)(INT_MAX) * 1000) +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) static size_t min(size_t x, size_t y) { return x < y ? x : y; } @@ -79,6 +93,103 @@ static void free_string_array(char** arr) free(arr); } +static unsigned int getdtablesize_or_crash() { + const int tablesize = getdtablesize(); + if (tablesize < 0) { + fprintf(stderr, "getdtablesize() failed: %s. This likely means the system is borked.\n", + strerror(errno)); + exit(-1); + } + + return (unsigned int)tablesize; +} + +static int fd_cast(unsigned int fd) { + if (fd > INT_MAX) { + fprintf(stderr, "File descriptor value %u is too large to fit into int. This likely means " + "the system is borked.\n", fd); + exit(-1); + } + + return (int)fd; +} + +static bool system_supports_close_range() { +#ifdef __linux__ + struct utsname buffer; + int major, minor, patch; + (void)uname(&buffer); // Linux uname can only throw EFAULT if buf is invalid. + + if (sscanf(buffer.release, "%d.%d.%d", &major, &minor, &patch) != 3) { + return false; + } + + return major > 5 || (major == 5 && minor >= 9); +#else + // TODO: Technically, FreeBSD does support close_range, but I don't need support for it right + // now, so leaving this unimplemented. + // https://man.freebsd.org/cgi/man.cgi?close_range(2) + return false; +#endif +} + +static int fd_qsort_compare(const void* a, const void* b) { + int fd_a = *(const int*)a; + int fd_b = *(const int*)b; + return (fd_a > fd_b) - (fd_a < fd_b); +} + +/// Fast path which uses close_range() to close ranges of fds. +static void close_all_non_reprl_fds_fast() { + // Unfortunately we cannot trust the reprl_fds array to be sorted since an accidental edit to + // reprl_fds could have broken that assumption. So let's create a new sorted array. It's cheap + // anyways. + int sorted_reprl_fds[ARRAY_SIZE(reprl_fds) + 2]; + sorted_reprl_fds[0] = 3; // Skip the well-known stdin, stdout, stderr fds. + memcpy(sorted_reprl_fds + 1, reprl_fds, sizeof(reprl_fds)); + sorted_reprl_fds[ARRAY_SIZE(sorted_reprl_fds) - 1] = fd_cast(getdtablesize_or_crash()); + qsort(sorted_reprl_fds, ARRAY_SIZE(sorted_reprl_fds), sizeof(int), fd_qsort_compare); + + // Cool, now we will iterate the sorted fds in ranges and close everything in between. + int start_fd = 3, end_fd; + for (size_t i = 0; i < ARRAY_SIZE(sorted_reprl_fds); i++) { + end_fd = sorted_reprl_fds[i]; + if (start_fd < end_fd) { + // Close the range [start_fd, end_fd) + close_range(start_fd, end_fd - 1, 0); + } + start_fd = end_fd + 1; + } +} + +/// Fallback path which makes a close() syscall for each non-REPRL fd. +static void close_all_non_reprl_fds_slow() { + const int tablesize = fd_cast(getdtablesize_or_crash()); + + for (int i = 3; i < tablesize; i++) { + bool is_reprl_fd = false; + for (size_t j = 0; j < ARRAY_SIZE(reprl_fds); j++) { + if (i == reprl_fds[j]) { + is_reprl_fd = true; + break; + } + } + + if (!is_reprl_fd) { + close(i); + } + } +} + +/// Close all file descriptors except the well-known REPRL and stdio fds. +static void close_all_non_reprl_fds() { + if (system_supports_close_range()) { + close_all_non_reprl_fds_fast(); + } else { + close_all_non_reprl_fds_slow(); + } +} + // A unidirectional communication channel for larger amounts of data, up to a maximum size (REPRL_MAX_DATA_SIZE). // Implemented as a (RAM-backed) file for which the file descriptor is shared with the child process and which is mapped into our address space. struct data_channel { @@ -250,13 +361,7 @@ static int reprl_spawn_child(struct reprl_context* ctx) close(devnull); // close all other FDs. We try to use FD_CLOEXEC everywhere, but let's be extra sure we don't leak any fds to the child. - int tablesize = getdtablesize(); - for (int i = 3; i < tablesize; i++) { - if (i == REPRL_CHILD_CTRL_IN || i == REPRL_CHILD_CTRL_OUT || i == REPRL_CHILD_DATA_IN || i == REPRL_CHILD_DATA_OUT) { - continue; - } - close(i); - } + close_all_non_reprl_fds(); execve(ctx->argv[0], ctx->argv, ctx->envp); From ce758a1ad5ae48949b397a08f80c2f5bba9fe58f Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Thu, 13 Nov 2025 11:30:29 -0800 Subject: [PATCH 2/2] cleanup: Improve portability Co-authored-by: taylor.fish --- Sources/libreprl/libreprl-posix.c | 57 ++++++++++++++++++------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/Sources/libreprl/libreprl-posix.c b/Sources/libreprl/libreprl-posix.c index f8e7824a4..f523e76eb 100644 --- a/Sources/libreprl/libreprl-posix.c +++ b/Sources/libreprl/libreprl-posix.c @@ -35,11 +35,21 @@ #include #include #include +#include #include #include #include + #ifdef __linux__ -#include +# ifdef __has_include +# if __has_include() +# define HAS_CLOSE_RANGE_HEADERS +# endif +# elif defined(__FreeBSD__) +# ifdef CLOSE_RANGE_CLOEXEC +# define HAS_CLOSE_RANGE_HEADERS +# endif +# endif #endif // Well-known file descriptor numbers for reprl <-> child communication, child process side @@ -60,6 +70,7 @@ static const int reprl_fds[] = { #define REPRL_MAX_TIMEOUT_IN_MICROSECONDS ((uint64_t)(INT_MAX) * 1000) #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + static size_t min(size_t x, size_t y) { return x < y ? x : y; } @@ -93,29 +104,20 @@ static void free_string_array(char** arr) free(arr); } -static unsigned int getdtablesize_or_crash() { +static int getdtablesize_or_crash() { const int tablesize = getdtablesize(); if (tablesize < 0) { fprintf(stderr, "getdtablesize() failed: %s. This likely means the system is borked.\n", strerror(errno)); - exit(-1); - } - - return (unsigned int)tablesize; -} - -static int fd_cast(unsigned int fd) { - if (fd > INT_MAX) { - fprintf(stderr, "File descriptor value %u is too large to fit into int. This likely means " - "the system is borked.\n", fd); - exit(-1); + abort(); } - return (int)fd; + return tablesize; } static bool system_supports_close_range() { -#ifdef __linux__ +#ifdef HAS_CLOSE_RANGE_HEADERS +# ifdef __linux__ struct utsname buffer; int major, minor, patch; (void)uname(&buffer); // Linux uname can only throw EFAULT if buf is invalid. @@ -125,11 +127,16 @@ static bool system_supports_close_range() { } return major > 5 || (major == 5 && minor >= 9); -#else +# elif defined(__FreeBSD__) // TODO: Technically, FreeBSD does support close_range, but I don't need support for it right - // now, so leaving this unimplemented. + // now, so leaving this unimplemented. Don't have a platform to test on. // https://man.freebsd.org/cgi/man.cgi?close_range(2) return false; +# else + return false; +# endif +#else + return false; #endif } @@ -141,13 +148,14 @@ static int fd_qsort_compare(const void* a, const void* b) { /// Fast path which uses close_range() to close ranges of fds. static void close_all_non_reprl_fds_fast() { +#ifdef HAS_CLOSE_RANGE_HEADERS // Unfortunately we cannot trust the reprl_fds array to be sorted since an accidental edit to // reprl_fds could have broken that assumption. So let's create a new sorted array. It's cheap // anyways. int sorted_reprl_fds[ARRAY_SIZE(reprl_fds) + 2]; sorted_reprl_fds[0] = 3; // Skip the well-known stdin, stdout, stderr fds. memcpy(sorted_reprl_fds + 1, reprl_fds, sizeof(reprl_fds)); - sorted_reprl_fds[ARRAY_SIZE(sorted_reprl_fds) - 1] = fd_cast(getdtablesize_or_crash()); + sorted_reprl_fds[ARRAY_SIZE(sorted_reprl_fds) - 1] = getdtablesize_or_crash(); qsort(sorted_reprl_fds, ARRAY_SIZE(sorted_reprl_fds), sizeof(int), fd_qsort_compare); // Cool, now we will iterate the sorted fds in ranges and close everything in between. @@ -160,11 +168,16 @@ static void close_all_non_reprl_fds_fast() { } start_fd = end_fd + 1; } +#else + fprintf(stderr, "close_all_non_reprl_fds_fast() called on a system which does not support " + "close_range(). This is likely a programming bug.\n"); + abort(); +#endif } /// Fallback path which makes a close() syscall for each non-REPRL fd. static void close_all_non_reprl_fds_slow() { - const int tablesize = fd_cast(getdtablesize_or_crash()); + const int tablesize = getdtablesize_or_crash(); for (int i = 3; i < tablesize; i++) { bool is_reprl_fd = false; @@ -183,11 +196,7 @@ static void close_all_non_reprl_fds_slow() { /// Close all file descriptors except the well-known REPRL and stdio fds. static void close_all_non_reprl_fds() { - if (system_supports_close_range()) { - close_all_non_reprl_fds_fast(); - } else { - close_all_non_reprl_fds_slow(); - } + system_supports_close_range() ? close_all_non_reprl_fds_fast() : close_all_non_reprl_fds_slow(); } // A unidirectional communication channel for larger amounts of data, up to a maximum size (REPRL_MAX_DATA_SIZE).