@@ -35,22 +35,19 @@ static __thread struct epoll_event epoll_events[MAX_KEVENT];
3535 */
3636static pthread_t monitoring_thread ;
3737static pid_t monitoring_tid ; /* Monitoring thread */
38- static atomic_bool monitoring_thread_created ; /* set after successful pthread_create;
39- distinguishes "never started" from
40- "started, then exited" (which clears
41- monitoring_tid). Cleared in the
42- fork hook so the child can spin up
43- its own monitoring thread. */
44-
4538static pthread_mutex_t monitoring_thread_mtx = PTHREAD_MUTEX_INITIALIZER ;
4639static pthread_cond_t monitoring_thread_cond = PTHREAD_COND_INITIALIZER ;
4740
41+ enum thread_exit_state {
42+ THREAD_EXIT_STATE_SELF_CANCEL = 0 ,
43+ THREAD_EXIT_STATE_CANCEL_LOCKED ,
44+ THREAD_EXIT_STATE_CANCEL_UNLOCKED ,
45+ };
46+
4847/*
49- * No more THREAD_EXIT_STATE_* enum: cleanup always acquires
50- * kq_mtx in the thread context that runs it, with no
51- * "inherited from cancellor" trick. See monitoring_thread_cleanup
52- * and linux_libkqueue_free below for the protocol.
48+ * Monitoring thread is exiting because the process is terminating
5349 */
50+ static enum thread_exit_state monitoring_thread_state ;
5451
5552/*
5653 * Map for kqueue pipes where index is the read side (for which signals are received)
@@ -119,84 +116,86 @@ fd_map_get(int fd)
119116 * fields and on kq_list with M0 in main's lockset but not T1's,
120117 * even though both threads acquire the same mutex.
121118 *
122- * Cleanup always acquires kq_mtx in the thread context that
123- * runs it (no "lock inherited from cancellor" trick). Both
124- * monitoring_thread_loop and linux_libkqueue_free are responsible
125- * for releasing kq_mtx before the cancellation point that triggers
126- * this handler so the lock is genuinely free when we get here.
127- *
128- * TSAN cannot validate the synchronisation pattern: its
129- * instrumentation does not propagate happens-before edges across
130- * pthread_cancel into the cleanup-handler invocation, and warnings
131- * fire on every memory access made by the chain (kqueue_free,
132- * filter_unregister_all, knote_delete_all, ...). TSAN_IGNORE on
133- * this function alone wouldn't help because the attribute doesn't
134- * propagate to callees. The suppressions file at tools/tsan.supp
135- * (`race:monitoring_thread_cleanup`) catches every warning under
136- * this stack pattern, applied in CI via TSAN_OPTIONS=suppressions=.
119+ * The protection is real (lock is acquired before any access)
120+ * but the instrumentation can't see it. The suppressions file
121+ * at tools/tsan.supp (`race:monitoring_thread_cleanup`) catches
122+ * every warning under this stack pattern, applied in CI via
123+ * TSAN_OPTIONS=suppressions=.
137124 */
138125static void
139126monitoring_thread_cleanup (UNUSED void * arg )
140127{
141128 struct kqueue * kq , * kq_tmp ;
142129
143- tracing_mutex_lock (& kq_mtx );
130+ if ((monitoring_thread_state == THREAD_EXIT_STATE_CANCEL_LOCKED ) ||
131+ (monitoring_thread_state == THREAD_EXIT_STATE_CANCEL_UNLOCKED )) {
144132
145- /*
146- * If the entire process is exiting, then free all
147- * the kqueues.
148- *
149- * We do this because we don't reliably receive all the
150- * close MONITORING_THREAD_SIGNALs before the process
151- * exits, and this avoids ASAN or valgrind raising
152- * spurious memory leaks.
153- *
154- * If the user _hasn't_ closed a KQ fd, then we don't
155- * free the underlying memory, and it'll be correctly
156- * reported as a memory leak.
157- *
158- * On the SELF_CANCEL path (kq_cnt hit zero in the loop)
159- * kq_list is already empty so this walk is a no-op.
160- */
161- LIST_FOREACH_SAFE (kq , & kq_list , kq_entry , kq_tmp ) {
162133 /*
163- * We only cleanup kqueues where their file descriptor
164- * has been closed. Other kqueues may be in the middle
165- * of operations with kq->kq_mtx held, and attempting to
166- * clean them up would cause a deadlock. Those are
167- * legitimate leaks the user application should have
168- * freed before exit.
134+ * Keep the assertion in kqueue_free happy
169135 */
170- dbg_printf ("kq=%p - fd=%i explicitly checking for closure" , kq , kq -> kq_id );
171- if (fcntl (kq -> kq_id , F_GETFD ) < 0 ) {
172- dbg_printf ("kq=%p - fd=%i forcefully cleaning up, current use_count=%u: %s" ,
173- kq , kq -> kq_id , fd_use_cnt [kq -> kq_id ],
174- errno == EBADF ? "File descriptor already closed" : strerror (errno ));
175- fd_use_cnt [kq -> kq_id ] = 0 ;
176- } else {
136+ if (monitoring_thread_state == THREAD_EXIT_STATE_CANCEL_UNLOCKED )
137+ tracing_mutex_lock (& kq_mtx );
138+
139+ /*
140+ * If the entire process is exiting, then free all
141+ * the kqueues.
142+ *
143+ * We do this because we don't reliably receive all the
144+ * close MONITORING_THREAD_SIGNALs before the process
145+ * exits, and this avoids ASAN or valgrind raising
146+ * spurious memory leaks.
147+ *
148+ * If the user _hasn't_ closed a KQ fd, then we don't
149+ * free the underlying memory, and it'll be correctly
150+ * reported as a memory leak.
151+ */
152+ LIST_FOREACH_SAFE (kq , & kq_list , kq_entry , kq_tmp ) {
177153 /*
178- * User never closed kqfd. We still treat this as a
179- * leak (sanity check below) but interrupt any threads
180- * parked in epoll_wait first so they exit kevent()
181- * with EBADF instead of staying stuck forever. The
182- * kq itself is still leaked per the existing contract
183- * - the user is responsible for matching kqueue() with
184- * close().
154+ * We only cleanup kqueues where their file descriptor
155+ * has been closed. Other kqueues may be in the middle
156+ * of operations with kq->kq_mtx held, and attempting to
157+ * clean them up would cause a deadlock. Those are
158+ * legitimate leaks the user application should have
159+ * freed before exit.
185160 */
186- assert (fd_use_cnt [kq -> kq_id ] > 0 );
187- linux_kqueue_interrupt (kq );
188- }
161+ dbg_printf ("kq=%p - fd=%i explicitly checking for closure" , kq , kq -> kq_id );
162+ if (fcntl (kq -> kq_id , F_GETFD ) < 0 ) {
163+ dbg_printf ("kq=%p - fd=%i forcefully cleaning up, current use_count=%u: %s" ,
164+ kq , kq -> kq_id , fd_use_cnt [kq -> kq_id ],
165+ errno == EBADF ? "File descriptor already closed" : strerror (errno ));
166+ fd_use_cnt [kq -> kq_id ] = 0 ;
167+ } else {
168+ /*
169+ * User never closed kqfd. We still treat this as a
170+ * leak (sanity check below) but interrupt any threads
171+ * parked in epoll_wait first so they exit kevent()
172+ * with EBADF instead of staying stuck forever. The
173+ * kq itself is still leaked per the existing contract
174+ * - the user is responsible for matching kqueue() with
175+ * close().
176+ */
177+ assert (fd_use_cnt [kq -> kq_id ] > 0 );
178+ linux_kqueue_interrupt (kq );
179+ }
189180
190- if (fd_use_cnt [kq -> kq_id ] == 0 ) {
191- dbg_printf ("kq=%p - fd=%i use_count=%u cleaning up..." , kq , kq -> kq_id , fd_use_cnt [kq -> kq_id ]);
192- kqueue_free (kq );
193- } else {
194- dbg_printf ("kq=%p - fd=%i is alive use_count=%u. Skipping, this is likely a leak..." ,
195- kq , kq -> kq_id , fd_use_cnt [kq -> kq_id ]);
181+ if (fd_use_cnt [kq -> kq_id ] == 0 ) {
182+ dbg_printf ("kq=%p - fd=%i use_count=%u cleaning up..." , kq , kq -> kq_id , fd_use_cnt [kq -> kq_id ]);
183+ kqueue_free (kq );
184+ } else {
185+ dbg_printf ("kq=%p - fd=%i is alive use_count=%u. Skipping, this is likely a leak..." ,
186+ kq , kq -> kq_id , fd_use_cnt [kq -> kq_id ]);
187+ }
196188 }
189+
190+ if (monitoring_thread_state == THREAD_EXIT_STATE_CANCEL_UNLOCKED )
191+ tracing_mutex_unlock (& kq_mtx );
197192 }
198193
199- dbg_printf ("tid=%u - monitoring thread exiting" , monitoring_tid );
194+ dbg_printf ("tid=%u - monitoring thread exiting (%s)" ,
195+ monitoring_tid ,
196+ monitoring_thread_state == THREAD_EXIT_STATE_SELF_CANCEL ?
197+ "no kqueues" : "process term" );
198+
200199 /* Free thread resources */
201200 free (fd_map );
202201 fd_map = NULL ;
@@ -206,7 +205,8 @@ monitoring_thread_cleanup(UNUSED void *arg)
206205 /* Reset so that thread can be restarted */
207206 monitoring_tid = 0 ;
208207
209- tracing_mutex_unlock (& kq_mtx );
208+ if (monitoring_thread_state == THREAD_EXIT_STATE_CANCEL_LOCKED )
209+ tracing_mutex_unlock (& kq_mtx );
210210}
211211
212212
@@ -332,6 +332,7 @@ monitoring_thread_loop(UNUSED void *arg)
332332 pthread_cond_signal (& monitoring_thread_cond );
333333 (void ) pthread_mutex_unlock (& monitoring_thread_mtx );
334334
335+ monitoring_thread_state = THREAD_EXIT_STATE_CANCEL_UNLOCKED ;
335336 pthread_cleanup_push (monitoring_thread_cleanup , NULL )
336337 while (true) {
337338 /*
@@ -368,30 +369,19 @@ monitoring_thread_loop(UNUSED void *arg)
368369 }
369370
370371 /*
371- * Drop kq_mtx before the cancellation point so that, if a
372- * pthread_cancel from main was already pending, the cleanup
373- * handler runs in a clean lockset and reacquires kq_mtx
374- * itself. Without this release, the handler would inherit a
375- * lock owned by this thread but not visible to TSAN's
376- * lockset-based race detection.
372+ * Ensure that any cancellation requests are acted on
377373 */
378- tracing_mutex_unlock ( & kq_mtx ) ;
374+ monitoring_thread_state = THREAD_EXIT_STATE_CANCEL_LOCKED ;
379375 pthread_setcancelstate (PTHREAD_CANCEL_ENABLE , NULL );
380376 pthread_testcancel ();
381377
378+ monitoring_thread_state = THREAD_EXIT_STATE_SELF_CANCEL ;
382379 pthread_cleanup_pop (true); /* Executes the cleanup function (monitoring_thread_cleanup) */
380+ res = pthread_detach (pthread_self ());
381+ if (res != 0 )
382+ dbg_printf ("pthread_detach(3): %s" , strerror (res ));
383+ tracing_mutex_unlock (& kq_mtx );
383384
384- /*
385- * No pthread_detach here. Main thread reads monitoring_tid
386- * under kq_mtx in linux_libkqueue_free; if the thread has
387- * already cleaned up (tid == 0 from cleanup) main skips
388- * pthread_join, otherwise main joins. Detaching from this
389- * side races: the thread can detach between main's tid-read
390- * and main's pthread_join, leaving join to fail with "joining
391- * already joined thread". Without detach, the thread is
392- * always joinable; the leak window is at most up to process
393- * exit, which is when libkqueue_free runs anyway.
394- */
395385 return NULL ;
396386}
397387
@@ -405,7 +395,6 @@ linux_kqueue_start_thread(void)
405395
406396 return (-1 );
407397 }
408- atomic_store (& monitoring_thread_created , true);
409398 /*
410399 * Wait for thread creation to publish monitoring_tid. Loop on
411400 * the predicate (monitoring_tid != 0) to be safe against
@@ -433,14 +422,13 @@ linux_libkqueue_fork(void)
433422 * which called it.
434423 *
435424 * Ensure we don't try and cancel it on exit by
436- * clearing the thread id and the created flag .
425+ * clearing the thread id.
437426 *
438427 * We don't need to lock the kq_mtx here as we're in
439428 * the child, and none of our threads will have been
440429 * duplicated.
441430 */
442431 monitoring_tid = 0 ;
443- atomic_store (& monitoring_thread_created , false);
444432
445433 /*
446434 * If the process has been forked, then we need
@@ -489,34 +477,23 @@ linux_libkqueue_fork(void)
489477static void
490478linux_libkqueue_free (void )
491479{
492- UNUSED pid_t tid ;
480+ pid_t tid ;
493481
494- /*
495- * Use monitoring_thread_created (set after pthread_create) to
496- * decide whether the thread exists at all - monitoring_tid is
497- * cleared by the cleanup handler when the thread exits, so it
498- * can't distinguish "never started" from "started, then
499- * exited". Always pthread_join when created so the thread is
500- * reaped (no thread-leak warning from TSAN). pthread_join on
501- * an exited-but-joinable thread returns immediately with the
502- * exit status; pthread_cancel on the same is a benign ESRCH.
503- *
504- * No kq_mtx around the read - monitoring_thread_created is an
505- * atomic; main has already finished its kq_mtx-protected work
506- * by the time atexit fires.
507- */
508- if (atomic_load (& monitoring_thread_created )) {
482+ tracing_mutex_lock (& kq_mtx );
483+ tid = monitoring_tid ; /* Gets trashed when the thread exits */
484+ if (tid ) {
509485 void * retval ;
510486 int ret ;
511487
512- tracing_mutex_lock (& kq_mtx );
513- tid = monitoring_tid ; /* may be 0 if cleanup already ran */
514- tracing_mutex_unlock (& kq_mtx );
515-
516488 dbg_printf ("tid=%u - cancelling" , tid );
517489 ret = pthread_cancel (monitoring_thread );
518490 if (ret != 0 )
519491 dbg_printf ("tid=%u - cancel failed: %s" , tid , strerror (ret ));
492+ /*
493+ * We unlock here to allow the monitoring thread
494+ * to continue if it was processing a cleanup.
495+ */
496+ tracing_mutex_unlock (& kq_mtx );
520497
521498 ret = pthread_join (monitoring_thread , & retval );
522499 if (ret == 0 ) {
@@ -528,7 +505,8 @@ linux_libkqueue_free(void)
528505 } else {
529506 dbg_printf ("tid=%u - join failed: %s" , tid , strerror (ret ));
530507 }
531- atomic_store (& monitoring_thread_created , false);
508+ } else {
509+ tracing_mutex_unlock (& kq_mtx );
532510 }
533511}
534512
0 commit comments