From 7060ee8e7d1f9b4e93109fc056da3b32d54fdab7 Mon Sep 17 00:00:00 2001 From: Bryan Call Date: Sun, 22 Mar 2026 11:40:00 -0700 Subject: [PATCH 1/3] Fix flaky tls_conn_timeout autest: handle SIGPIPE and EINTR The ssl-delay-server test helper could die unexpectedly when a client disconnects during the handshake delay. SIGPIPE from the broken connection kills the process, or accept() returns EINTR under heavy parallel load. Add SIGPIPE ignore and EINTR retry to keep the server alive for the StillRunningAfter check. --- tests/gold_tests/timeout/ssl-delay-server.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/gold_tests/timeout/ssl-delay-server.cc b/tests/gold_tests/timeout/ssl-delay-server.cc index d993696229e..93c867f7b5b 100644 --- a/tests/gold_tests/timeout/ssl-delay-server.cc +++ b/tests/gold_tests/timeout/ssl-delay-server.cc @@ -39,6 +39,7 @@ #include #include #include +#include char req_buf[10000]; char post_buf[1000]; @@ -156,6 +157,10 @@ main(int argc, char *argv[]) ttfb_delay = atoi(argv[3]); const char *pem_file = argv[4]; + // Ignore SIGPIPE which can be raised when a client disconnects during + // the handshake delay, killing the process unexpectedly. + signal(SIGPIPE, SIG_IGN); + fprintf(stderr, "Listen on %d connect delay=%d ttfb delay=%d\n", listen_port, connect_delay, ttfb_delay); int listenfd = socket(AF_INET, SOCK_STREAM, 0); @@ -199,8 +204,10 @@ main(int argc, char *argv[]) for (;;) { sfd = accept(listenfd, (struct sockaddr *)nullptr, nullptr); if (sfd <= 0) { - // Failure - printf("Listen failure\n"); + if (errno == EINTR) { + continue; + } + printf("Listen failure errno=%d\n", errno); exit(1); } From 3db0ce77ef6705fc12c2a5f68f63553708c92495 Mon Sep 17 00:00:00 2001 From: Bryan Call Date: Sun, 22 Mar 2026 11:37:33 -0700 Subject: [PATCH 2/3] Fix flaky sigusr2 autest: remove deadlocking Ready condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test 1's Default process had Ready = When.FileExists(diags.log), but by the time Default starts, rotate_diags_log has already moved diags.log to diags.log_old. This creates a deadlock: Default waits for diags.log to exist, but only SIGUSR2 (sent by Default) would cause TS to recreate it. The StartBefore chain already guarantees correct ordering (ts → rotate → Default), so the Ready condition is unnecessary and harmful. --- tests/gold_tests/logging/sigusr2.test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/gold_tests/logging/sigusr2.test.py b/tests/gold_tests/logging/sigusr2.test.py index 397119ab2b5..b339bbbc260 100644 --- a/tests/gold_tests/logging/sigusr2.test.py +++ b/tests/gold_tests/logging/sigusr2.test.py @@ -130,9 +130,9 @@ def get_sigusr2_signal_command(self): # Configure the signaling of SIGUSR2 to traffic_server. tr1.Processes.Default.Command = diags_test.get_sigusr2_signal_command() tr1.Processes.Default.Return = 0 -tr1.Processes.Default.Ready = When.FileExists(diags_test.diags_log) - -# Configure process order. +# Configure process order: ts starts first, then rotate moves diags.log, +# then Default sends SIGUSR2. No Ready condition needed on Default since the +# StartBefore chain already ensures ts is fully started before rotate runs. tr1.Processes.Default.StartBefore(rotate_diags_log) rotate_diags_log.StartBefore(diags_test.ts) tr1.StillRunningAfter = diags_test.ts From f805ae5506a19b37f8756a5924d92e4b3e782e23 Mon Sep 17 00:00:00 2001 From: Bryan Call Date: Sun, 22 Mar 2026 20:57:56 -0700 Subject: [PATCH 3/3] Fix flaky thread_config: add cmdline matching for ASAN Under ASAN, the ATS process CWD may differ from the expected ts_path. Fall back to matching ts_path in the process command line arguments so the test can find the correct traffic_server process. --- tests/gold_tests/thread_config/check_threads.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/gold_tests/thread_config/check_threads.py b/tests/gold_tests/thread_config/check_threads.py index 1a7de52cb92..e0347b7dddc 100755 --- a/tests/gold_tests/thread_config/check_threads.py +++ b/tests/gold_tests/thread_config/check_threads.py @@ -36,13 +36,16 @@ def _count_threads_once(ts_path, etnet_threads, accept_threads, task_threads, ai # Find the pid corresponding to the ats process we started in autest. # It needs to match the process name and the binary path. # If autest can expose the pid of the process this is not needed anymore. + # Match by CWD or command line containing ts_path, since under + # ASAN the CWD may differ from the expected path. process_name = p.name() process_cwd = p.cwd() process_exe = p.exe() - if process_cwd != ts_path: - continue - if process_name != '[TS_MAIN]' and process_name != 'traffic_server' and os.path.basename( - process_exe) != 'traffic_server': + is_ts = process_name == '[TS_MAIN]' or process_name == 'traffic_server' or os.path.basename( + process_exe) == 'traffic_server' + match_by_cwd = process_cwd == ts_path + match_by_cmdline = any(ts_path in arg for arg in (p.cmdline() or [])) + if not is_ts or not (match_by_cwd or match_by_cmdline): continue except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): continue