-
-
Notifications
You must be signed in to change notification settings - Fork 7.1k
Description
I did this
Called curl_multi_perform() in a loop on a multi handle with no easy handles added, in a process with many threads:
CURLM *multi = curl_multi_init();
while (running) {
curl_multi_wait(multi, extra_fds, 1, 100, NULL);
curl_multi_perform(multi, &running_handles); // multi is empty
}This was running in Varnish Enterprise's vmod_http module, which uses the curl multi interface. The user reporting this to us was running with 100,000 varnish (pthread) worker threads.
I expected the following
Minimal CPU usage since there's nothing to do - no transfers, no handles.
What actually happened
High CPU usage that scales linearly with thread count:
- 10,000 threads: ~27% CPU
- 20,000 threads: ~49% CPU
- 100,000 threads: ~220%+ CPU (extrapolated)
The user reporting this to us reported 2500% CPU consumption for the 100,000 case.
Profiling shows the time is spent in sigaction().
Root cause analysis
When curl_multi_perform() is called, it processes the connection pool via an internal data handle (multi->cpool.idata). This internal handle does not have CURLOPT_NOSIGNAL set, so sigpipe_apply() performs sigaction() calls to save/restore the SIGPIPE handler.
On Linux, sigaction() is expensive in multi-threaded processes because the kernel must manage signal disposition across all threads. With many threads, this becomes the dominant cost.
When user easy handles are present with CURLOPT_NOSIGNAL=1, no sigaction() is called. But with an empty multi (or only internal handles), this optimization does not apply.
Reproducer
I had an AI helper (Claude) come up with a reproducer that mimics our curl multi engine bits while at the same time creating a configurable bunch of threads,The reproducer demonstrates that for a multi that is empty we incur a very significant CPU cost, while if we just add a single handle (that sets CURLOPT_NOSIGNAL) the CPU cost becomes ~negligible.
/*
* Reproducer for curl_multi_perform + sigaction CPU issue
*
* ROOT CAUSE: When curl_multi_perform() is called on an EMPTY multi
* handle, there's no easy handle to check CURLOPT_NOSIGNAL, so
* libcurl defaults to doing signal handling via sigpipe_apply() ->
* sigaction(). With many threads, sigaction() becomes extremely
* expensive due to kernel signal state management.
*
* Compile: gcc -O2 -o reproducer reproducer.c -lcurl -lpthread
*
* Usage:
* ./reproducer [num_threads] [num_engines] [handles_per_engine]
*
* Examples:
* ./reproducer 10000 10 0 # Empty multi - HIGH CPU (the bug)
* ./reproducer 10000 10 1 # With NOSIGNAL handle - LOW CPU
*
*/
#include <errno.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/resource.h>
#include <curl/curl.h>
static int num_threads = 10000;
static int num_engines = 10;
static int handles_per_engine = 0;
static volatile int running = 1;
static volatile long long loop_count = 0;
static void
sigusr1_handler(int sig)
{
(void)sig;
}
static void *
worker_thread(void *arg)
{
(void)arg;
while (running)
pause();
return (NULL);
}
static size_t
null_write_cb(char *ptr, size_t size, size_t nmemb, void *userdata)
{
(void)ptr;
(void)userdata;
return (size * nmemb);
}
/* This mimics Varnish's engine_thread */
static void *
engine_thread(void *arg)
{
int id = (int)(long)arg;
CURLM *multi;
CURL **handles = NULL;
struct curl_waitfd waitfd[1];
int pipefd[2];
long long local_count = 0;
int i;
(void)id;
if (pipe(pipefd) < 0)
return (NULL);
multi = curl_multi_init();
if (multi == NULL)
return (NULL);
/* Add handles with CURLOPT_NOSIGNAL=1 if requested */
if (handles_per_engine > 0) {
handles = calloc(handles_per_engine, sizeof(CURL *));
for (i = 0; i < handles_per_engine; i++) {
handles[i] = curl_easy_init();
if (handles[i] != NULL) {
curl_easy_setopt(handles[i], CURLOPT_URL,
"http://192.0.2.1:1/");
curl_easy_setopt(handles[i], CURLOPT_NOSIGNAL,
1L); /* KEY! */
curl_easy_setopt(handles[i],
CURLOPT_WRITEFUNCTION, null_write_cb);
curl_easy_setopt(handles[i],
CURLOPT_CONNECTTIMEOUT, 3600L);
curl_multi_add_handle(multi, handles[i]);
}
}
}
waitfd[0].fd = pipefd[0];
waitfd[0].events = CURL_WAIT_POLLIN;
waitfd[0].revents = 0;
while (running) {
int n;
CURLMcode mc;
/* Wait for activity (100ms timeout) */
curl_multi_wait(multi, waitfd, 1, 100, NULL);
/* This is where sigpipe_apply -> sigaction is called
* when multi is empty! */
do {
mc = curl_multi_perform(multi, &n);
} while (mc == CURLM_CALL_MULTI_PERFORM);
local_count++;
waitfd[0].revents = 0;
}
__sync_fetch_and_add(&loop_count, local_count);
if (handles != NULL) {
for (i = 0; i < handles_per_engine; i++) {
if (handles[i] != NULL) {
curl_multi_remove_handle(multi, handles[i]);
curl_easy_cleanup(handles[i]);
}
}
free(handles);
}
curl_multi_cleanup(multi);
close(pipefd[0]);
close(pipefd[1]);
return (NULL);
}
static long long
get_cpu_time_us(void)
{
struct rusage ru;
getrusage(RUSAGE_SELF, &ru);
return ((ru.ru_utime.tv_sec + ru.ru_stime.tv_sec) * 1000000LL +
(ru.ru_utime.tv_usec + ru.ru_stime.tv_usec));
}
int
main(int argc, char *argv[])
{
pthread_t *workers;
pthread_t *engines;
int i;
struct sigaction sa;
long long cpu_start, cpu_end;
double cpu_secs;
if (argc > 1)
num_threads = atoi(argv[1]);
if (argc > 2)
num_engines = atoi(argv[2]);
if (argc > 3)
handles_per_engine = atoi(argv[3]);
printf("=== curl_multi_perform sigaction CPU Bug Reproducer ===\n\n");
printf("curl version: %s\n", curl_version());
printf("Worker threads: %d\n", num_threads);
printf("Engine threads: %d\n", num_engines);
printf("Handles/engine: %d %s\n\n", handles_per_engine,
handles_per_engine == 0 ?
"(EMPTY MULTI - triggers bug!)" : "(with NOSIGNAL)");
/* Set up signal handlers */
memset(&sa, 0, sizeof(sa));
sa.sa_handler = SIG_IGN;
sa.sa_flags = SA_RESTART;
sigaction(SIGPIPE, &sa, NULL);
sa.sa_handler = sigusr1_handler;
sa.sa_flags = 0;
sigaction(SIGUSR1, &sa, NULL);
curl_global_init(CURL_GLOBAL_ALL);
/* Create worker threads to make sigaction expensive */
workers = calloc(num_threads, sizeof(pthread_t));
printf("Creating %d worker threads... ", num_threads);
fflush(stdout);
for (i = 0; i < num_threads; i++) {
if (pthread_create(&workers[i], NULL, worker_thread,
NULL) != 0) {
num_threads = i;
break;
}
}
printf("done\n");
/* Create engine threads */
engines = calloc(num_engines, sizeof(pthread_t));
printf("Creating %d engine threads... ", num_engines);
fflush(stdout);
for (i = 0; i < num_engines; i++) {
pthread_create(&engines[i], NULL, engine_thread,
(void *)(long)i);
}
printf("done\n\n");
sleep(1); /* Let engines start */
cpu_start = get_cpu_time_us();
printf("Running for 10 seconds (watch CPU with: top -H -p %d)...\n",
getpid());
sleep(10);
cpu_end = get_cpu_time_us();
running = 0;
for (i = 0; i < num_threads; i++)
pthread_kill(workers[i], SIGUSR1);
for (i = 0; i < num_engines; i++)
pthread_join(engines[i], NULL);
cpu_secs = (cpu_end - cpu_start) / 1e6;
printf("\n=== RESULTS ===\n");
printf("CPU time: %.2f seconds (over 10 wall seconds)\n", cpu_secs);
printf("CPU usage: %.1f%%\n", cpu_secs * 10.0);
printf("Loops/sec/engine: %.0f (expected ~10 with 100ms wait)\n",
(double)loop_count / 10 / num_engines);
if (handles_per_engine == 0 && cpu_secs > 1.0) {
printf("\n*** BUG CONFIRMED ***\n");
printf("High CPU with empty multi handle due to"
" sigaction() calls.\n");
printf("This scales linearly with thread count.\n");
printf("With 100K threads, expect ~%.0f%% CPU\n",
cpu_secs * 10.0 * 5);
}
curl_global_cleanup();
free(workers);
free(engines);
return (0);
}Workaround
On our end we've done a workaround which is to skip calling curl_multi_perform() when there are no pending requests.
curl/libcurl version
curl 8.15.0 (x86_64-redhat-linux-gnu) libcurl/8.15.0 OpenSSL/3.5.4 zlib/1.3.1.zlib-ng brotli/1.2.0 libidn2/2.3.8 libpsl/0.21.5 libssh/0.11.3/openssl/zlib nghttp2/1.66.0 OpenLDAP/2.6.10
Release-Date: 2025-07-16
Protocols: dict file ftp ftps gopher gophers http https imap imaps ipfs ipns ldap ldaps mqtt pop3 pop3s rtsp scp sftp smb smbs smtp smtps telnet tftp ws wss
Features: alt-svc AsynchDNS brotli GSS-API HSTS HTTP2 HTTPS-proxy IDN IPv6 Kerberos Largefile libz NTLM PSL SPNEGO SSL threadsafe TLS-SRP UnixSockets
operating system
Fedora Linux 43 (Forty Three)