Skip to content

Commit

Permalink
1.0.26.17: fix GC/SIG_STOP_FOR_GC race
Browse files Browse the repository at this point in the history
Consider this: in a PA section GC is requested: GC_PENDING,
pseudo_atomic_interrupted and gc_blocked_deferrables are set,
deferrables are blocked then pseudo_atomic_atomic is cleared, but a
SIG_STOP_FOR_GC arrives before trapping to interrupt_handle_pending.
In sig_stop_for_gc_handler, GC_PENDING is cleared but
pseudo_atomic_interrupted is not and we go on running with
pseudo_atomic_interrupted but without a pending interrupt or GC.
GC_BLOCKED_DEFERRABLES is also left at 1.

Add more checks, fix comments.
  • Loading branch information
Gabor Melis committed Mar 22, 2009
1 parent 7a0ba26 commit ce73c96
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 17 deletions.
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ changes in sbcl-1.0.27 relative to 1.0.26:
* new port: support added for x86-64 OpenBSD. (thanks to Josh Elsasser)
* bug fix: a type error is signaled for attempts to use the LOOP
keyword ACROSS for a NIL value. (thanks to Daniel Lowe)
* bug fix: fix gc related interrupt handling bug on ppc (regression from
1.0.25.37, reported by Harald Hanche-Olsen)
* bug fix: work around signal delivery bug in darwin (regression from
1.0.25.44, reported by Sidney Markowitz)

changes in sbcl-1.0.26 relative to 1.0.25:
* incompatible change: an interruption (be it a function passed to
Expand Down
7 changes: 3 additions & 4 deletions src/runtime/alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,9 @@ pa_alloc(int bytes, int page_type_flag)
lispobj *result;
struct thread *th = arch_os_get_current_thread();

/* SIG_STOP_FOR_GC needs to be enabled before we can call lisp:
* otherwise two threads racing here may deadlock: the other will
* wait on the GC lock, and the other cannot stop the first
* one... */
/* SIG_STOP_FOR_GC must be unblocked: else two threads racing here
* may deadlock: one will wait on the GC lock, and the other
* cannot stop the first one... */
check_gc_signals_unblocked_or_lose(0);

/* FIXME: OOAO violation: see arch_pseudo_* */
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/backtrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,7 @@ describe_thread_state(void)
{
sigset_t mask;
struct thread *thread = arch_os_get_current_thread();
struct interrupt_data *data = thread->interrupt_data;
#ifndef LISP_FEATURE_WIN32
get_current_sigmask(&mask);
printf("Signal mask:\n");
Expand All @@ -553,6 +554,7 @@ describe_thread_state(void)
#ifdef STOP_FOR_GC_PENDING
printf(" *STOP-FOR-GC-PENDING* = %s\n", (SymbolValue(STOP_FOR_GC_PENDING, thread) == T) ? "T" : "NIL");
#endif
printf("Pending handler = %p\n", data->pending_handler);
}

/* This function has been split from backtrace() to enable Lisp
Expand Down
5 changes: 5 additions & 0 deletions src/runtime/dynbind.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
* files for more information.
*/

#include <stdio.h>
#include <stdlib.h>

#include "sbcl.h"
#include "runtime.h"
#include "globals.h"
Expand Down Expand Up @@ -44,6 +47,7 @@ void bind_variable(lispobj symbol, lispobj value, void *th)
if(!sym->tls_index) {
lispobj *tls_index_lock=
&((struct symbol *)native_pointer(TLS_INDEX_LOCK))->value;
FSHOW_SIGNAL((stderr, "entering dynbind tls alloc\n"));
set_pseudo_atomic_atomic(th);
get_spinlock(tls_index_lock,(long)th);
if(!sym->tls_index) {
Expand All @@ -55,6 +59,7 @@ void bind_variable(lispobj symbol, lispobj value, void *th)
}
}
release_spinlock(tls_index_lock);
FSHOW_SIGNAL((stderr, "exiting dynbind tls alloc\n"));
clear_pseudo_atomic_atomic(th);
if (get_pseudo_atomic_interrupted(th))
do_pending_interrupt();
Expand Down
46 changes: 37 additions & 9 deletions src/runtime/interrupt.c
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,14 @@ check_interrupt_context_or_lose(os_context_t *context)
if (stop_for_gc_pending)
if (!(pseudo_atomic_interrupted || gc_inhibit || in_race_p))
lose("STOP_FOR_GC_PENDING, but why?\n");
if (pseudo_atomic_interrupted)
if (!(gc_pending || stop_for_gc_pending || interrupt_deferred_p))
lose("pseudo_atomic_interrupted, but why?\n");
}
#else
if (pseudo_atomic_interrupted)
if (!(gc_pending || interrupt_deferred_p))
lose("pseudo_atomic_interrupted, but why?\n");
#endif
#endif
if (interrupt_pending && !interrupt_deferred_p)
Expand Down Expand Up @@ -756,7 +763,7 @@ interrupt_handle_pending(os_context_t *context)
struct interrupt_data *data = thread->interrupt_data;

if (arch_pseudo_atomic_atomic(context)) {
lose("Handling pending interrupt in pseduo atomic.");
lose("Handling pending interrupt in pseudo atomic.");
}

FSHOW_SIGNAL((stderr, "/entering interrupt_handle_pending\n"));
Expand Down Expand Up @@ -876,11 +883,15 @@ interrupt_handle_pending(os_context_t *context)
sigcopyset(os_context_sigmask_addr(context), &data->pending_mask);
run_deferred_handler(data, context);
}
#endif
#ifdef LISP_FEATURE_GENCGC
if (get_pseudo_atomic_interrupted(thread))
lose("pseudo_atomic_interrupted after interrupt_handle_pending\n");
#endif
/* It is possible that the end of this function was reached
* without never actually doing anything, the tests in Lisp for
* when to call receive-pending-interrupt are not exact. */
FSHOW_SIGNAL((stderr, "/exiting interrupt_handle_pending\n"));
#endif
}


Expand Down Expand Up @@ -1023,24 +1034,24 @@ maybe_defer_handler(void *handler, struct interrupt_data *data,
*/
if ((SymbolValue(INTERRUPTS_ENABLED,thread) == NIL) ||
in_leaving_without_gcing_race_p(thread)) {
store_signal_data_for_later(data,handler,signal,info,context);
SetSymbolValue(INTERRUPT_PENDING, T,thread);
FSHOW_SIGNAL((stderr,
"/maybe_defer_handler(%x,%d): deferred (RACE=%d)\n",
(unsigned int)handler,signal,
in_leaving_without_gcing_race_p(thread)));
store_signal_data_for_later(data,handler,signal,info,context);
SetSymbolValue(INTERRUPT_PENDING, T,thread);
check_interrupt_context_or_lose(context);
return 1;
}
/* a slightly confusing test. arch_pseudo_atomic_atomic() doesn't
* actually use its argument for anything on x86, so this branch
* may succeed even when context is null (gencgc alloc()) */
if (arch_pseudo_atomic_atomic(context)) {
store_signal_data_for_later(data,handler,signal,info,context);
arch_set_pseudo_atomic_interrupted(context);
FSHOW_SIGNAL((stderr,
"/maybe_defer_handler(%x,%d): deferred(PA)\n",
(unsigned int)handler,signal));
store_signal_data_for_later(data,handler,signal,info,context);
arch_set_pseudo_atomic_interrupted(context);
check_interrupt_context_or_lose(context);
return 1;
}
Expand Down Expand Up @@ -1128,15 +1139,15 @@ sig_stop_for_gc_handler(int signal, siginfo_t *info, os_context_t *context)
/* Test for GC_INHIBIT _first_, else we'd trap on every single
* pseudo atomic until gc is finally allowed. */
if (SymbolValue(GC_INHIBIT,thread) != NIL) {
SetSymbolValue(STOP_FOR_GC_PENDING,T,thread);
FSHOW_SIGNAL((stderr, "sig_stop_for_gc deferred (*GC-INHIBIT*)\n"));
SetSymbolValue(STOP_FOR_GC_PENDING,T,thread);
return;
} else if (arch_pseudo_atomic_atomic(context)) {
FSHOW_SIGNAL((stderr,"sig_stop_for_gc deferred (PA)\n"));
SetSymbolValue(STOP_FOR_GC_PENDING,T,thread);
arch_set_pseudo_atomic_interrupted(context);
maybe_save_gc_mask_and_block_deferrables
(os_context_sigmask_addr(context));
FSHOW_SIGNAL((stderr,"sig_stop_for_gc deferred (PA)\n"));
return;
}

Expand All @@ -1151,6 +1162,23 @@ sig_stop_for_gc_handler(int signal, siginfo_t *info, os_context_t *context)
SetSymbolValue(GC_PENDING,NIL,thread);
SetSymbolValue(STOP_FOR_GC_PENDING,NIL,thread);

/* Consider this: in a PA section GC is requested: GC_PENDING,
* pseudo_atomic_interrupted and gc_blocked_deferrables are set,
* deferrables are blocked then pseudo_atomic_atomic is cleared,
* but a SIG_STOP_FOR_GC arrives before trapping to
* interrupt_handle_pending. Here, GC_PENDING is cleared but
* pseudo_atomic_interrupted is not and we go on running with
* pseudo_atomic_interrupted but without a pending interrupt or
* GC. GC_BLOCKED_DEFERRABLES is also left at 1. So let's tidy it
* up. */
if (thread->interrupt_data->gc_blocked_deferrables) {
FSHOW_SIGNAL((stderr,"cleaning up after gc_blocked_deferrables\n"));
clear_pseudo_atomic_interrupted(thread);
sigcopyset(os_context_sigmask_addr(context),
&thread->interrupt_data->pending_mask);
thread->interrupt_data->gc_blocked_deferrables = 0;
}

if(thread_state(thread)!=STATE_RUNNING) {
lose("sig_stop_for_gc_handler: wrong thread state: %ld\n",
fixnum_value(thread->state));
Expand Down Expand Up @@ -1476,7 +1504,7 @@ handle_guard_page_triggered(os_context_t *context,os_vm_address_t addr)
else if (addr >= undefined_alien_address &&
addr < undefined_alien_address + os_vm_page_size) {
arrange_return_to_lisp_function
(context, StaticSymbolFunction(UNDEFINED_ALIEN_VARIABLE_ERROR));
(context, StaticSymbolFunction(UNDEFINED_ALIEN_VARIABLE_ERROR));
return 1;
}
else return 0;
Expand Down
6 changes: 3 additions & 3 deletions src/runtime/interrupt.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ struct interrupt_data {
siginfo_t pending_info;
sigset_t pending_mask;
/* Was pending mask saved for gc request? True if GC_PENDING or
* SIG_STOP_FOR_GC happened in a pseudo atomic with no GC_INHIBIT
* NIL. Both deferrable interrupt handlers and gc are careful not
* to clobber each other's pending_mask. */
* SIG_STOP_FOR_GC happened in a pseudo atomic with GC_INHIBIT NIL
* and with no pending handler. Both deferrable interrupt handlers
* and gc are careful not to clobber each other's pending_mask. */
boolean gc_blocked_deferrables;
#ifdef LISP_FEATURE_PPC
/* On PPC when consing wants to turn to alloc(), it does so via a
Expand Down
2 changes: 1 addition & 1 deletion version.lisp-expr
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@
;;; checkins which aren't released. (And occasionally for internal
;;; versions, especially for internal versions off the main CVS
;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
"1.0.26.16"
"1.0.26.17"

0 comments on commit ce73c96

Please sign in to comment.