Skip to content

Commit

Permalink
pythongh-112175: Add eval_breaker to PyThreadState (python#115194)
Browse files Browse the repository at this point in the history
This change adds an `eval_breaker` field to `PyThreadState`. The primary
motivation is for performance in free-threaded builds: with thread-local eval
breakers, we can stop a specific thread (e.g., for an async exception) without
interrupting other threads.

The source of truth for the global instrumentation version is stored in the
`instrumentation_version` field in PyInterpreterState. Threads usually read the
version from their local `eval_breaker`, where it continues to be colocated
with the eval breaker bits.
  • Loading branch information
swtaarrs authored and diegorusso committed Apr 17, 2024
1 parent 39bb8a5 commit 6f23410
Show file tree
Hide file tree
Showing 19 changed files with 262 additions and 169 deletions.
5 changes: 5 additions & 0 deletions Include/cpython/pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ struct _ts {
PyThreadState *next;
PyInterpreterState *interp;

/* The global instrumentation version in high bits, plus flags indicating
when to break out of the interpreter loop in lower bits. See details in
pycore_ceval.h. */
uintptr_t eval_breaker;

struct {
/* Has been initialized to a safe state.
Expand Down
50 changes: 26 additions & 24 deletions Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ PyAPI_FUNC(int) _PyEval_MakePendingCalls(PyThreadState *);

extern void _Py_FinishPendingCalls(PyThreadState *tstate);
extern void _PyEval_InitState(PyInterpreterState *);
extern void _PyEval_SignalReceived(PyInterpreterState *interp);
extern void _PyEval_SignalReceived(void);

// bitwise flags:
#define _Py_PENDING_MAINTHREADONLY 1
Expand All @@ -55,7 +55,6 @@ PyAPI_FUNC(int) _PyEval_AddPendingCall(
void *arg,
int flags);

extern void _PyEval_SignalAsyncExc(PyInterpreterState *interp);
#ifdef HAVE_FORK
extern PyStatus _PyEval_ReInitThreads(PyThreadState *tstate);
#endif
Expand Down Expand Up @@ -200,40 +199,43 @@ int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int a
void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);


#define _PY_GIL_DROP_REQUEST_BIT 0
#define _PY_SIGNALS_PENDING_BIT 1
#define _PY_CALLS_TO_DO_BIT 2
#define _PY_ASYNC_EXCEPTION_BIT 3
#define _PY_GC_SCHEDULED_BIT 4
#define _PY_EVAL_PLEASE_STOP_BIT 5
#define _PY_EVAL_EXPLICIT_MERGE_BIT 6
/* Bits that can be set in PyThreadState.eval_breaker */
#define _PY_GIL_DROP_REQUEST_BIT (1U << 0)
#define _PY_SIGNALS_PENDING_BIT (1U << 1)
#define _PY_CALLS_TO_DO_BIT (1U << 2)
#define _PY_ASYNC_EXCEPTION_BIT (1U << 3)
#define _PY_GC_SCHEDULED_BIT (1U << 4)
#define _PY_EVAL_PLEASE_STOP_BIT (1U << 5)
#define _PY_EVAL_EXPLICIT_MERGE_BIT (1U << 6)

/* Reserve a few bits for future use */
#define _PY_EVAL_EVENTS_BITS 8
#define _PY_EVAL_EVENTS_MASK ((1 << _PY_EVAL_EVENTS_BITS)-1)

static inline void
_Py_set_eval_breaker_bit(PyInterpreterState *interp, uint32_t bit, uint32_t set)
_Py_set_eval_breaker_bit(PyThreadState *tstate, uintptr_t bit)
{
assert(set == 0 || set == 1);
uintptr_t to_set = set << bit;
uintptr_t mask = ((uintptr_t)1) << bit;
uintptr_t old = _Py_atomic_load_uintptr(&interp->ceval.eval_breaker);
if ((old & mask) == to_set) {
return;
}
uintptr_t new;
do {
new = (old & ~mask) | to_set;
} while (!_Py_atomic_compare_exchange_uintptr(&interp->ceval.eval_breaker, &old, new));
_Py_atomic_or_uintptr(&tstate->eval_breaker, bit);
}

static inline void
_Py_unset_eval_breaker_bit(PyThreadState *tstate, uintptr_t bit)
{
_Py_atomic_and_uintptr(&tstate->eval_breaker, ~bit);
}

static inline bool
_Py_eval_breaker_bit_is_set(PyInterpreterState *interp, int32_t bit)
static inline int
_Py_eval_breaker_bit_is_set(PyThreadState *tstate, uintptr_t bit)
{
return _Py_atomic_load_uintptr_relaxed(&interp->ceval.eval_breaker) & (((uintptr_t)1) << bit);
uintptr_t b = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
return (b & bit) != 0;
}

// Free-threaded builds use these functions to set or unset a bit on all
// threads in the given interpreter.
void _Py_set_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit);
void _Py_unset_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit);


#ifdef __cplusplus
}
Expand Down
11 changes: 4 additions & 7 deletions Include/internal/pycore_ceval_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,10 @@ struct _ceval_runtime_state {


struct _ceval_state {
/* This single variable consolidates all requests to break out of
* the fast path in the eval loop.
* It is by far the hottest field in this struct and
* should be placed at the beginning. */
uintptr_t eval_breaker;
/* Avoid false sharing */
int64_t padding[7];
/* This variable holds the global instrumentation version. When a thread is
running, this value is overlaid onto PyThreadState.eval_breaker so that
changes in the instrumentation version will trigger the eval breaker. */
uintptr_t instrumentation_version;
int recursion_limit;
struct _gil_runtime_state *gil;
int own_gil;
Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ extern PyObject *_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs);

// Functions to clear types free lists
extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
extern void _Py_ScheduleGC(PyInterpreterState *interp);
extern void _Py_ScheduleGC(PyThreadState *tstate);
extern void _Py_RunGC(PyThreadState *tstate);

#ifdef __cplusplus
Expand Down
3 changes: 3 additions & 0 deletions Include/internal/pycore_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,10 @@ typedef struct pyruntimestate {
int64_t next_id;
} interpreters;

/* Platform-specific identifier and PyThreadState, respectively, for the
main thread in the main interpreter. */
unsigned long main_thread;
PyThreadState *main_tstate;

/* ---------- IMPORTANT ---------------------------
The fields above this line are declared as early as
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Every ``PyThreadState`` now has its own ``eval_breaker``, allowing specific threads to be interrupted.
11 changes: 4 additions & 7 deletions Modules/signalmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,11 +276,7 @@ trip_signal(int sig_num)
cleared in PyErr_CheckSignals() before .tripped. */
_Py_atomic_store_int(&is_tripped, 1);

/* Signals are always handled by the main interpreter */
PyInterpreterState *interp = _PyInterpreterState_Main();

/* Notify ceval.c */
_PyEval_SignalReceived(interp);
_PyEval_SignalReceived();

/* And then write to the wakeup fd *after* setting all the globals and
doing the _PyEval_SignalReceived. We used to write to the wakeup fd
Expand All @@ -303,6 +299,7 @@ trip_signal(int sig_num)

int fd = wakeup.fd;
if (fd != INVALID_FD) {
PyInterpreterState *interp = _PyInterpreterState_Main();
unsigned char byte = (unsigned char)sig_num;
#ifdef MS_WINDOWS
if (wakeup.use_send) {
Expand Down Expand Up @@ -1770,8 +1767,8 @@ PyErr_CheckSignals(void)
Python code to ensure signals are handled. Checking for the GC here
allows long running native code to clean cycles created using the C-API
even if it doesn't run the evaluation loop */
if (_Py_eval_breaker_bit_is_set(tstate->interp, _PY_GC_SCHEDULED_BIT)) {
_Py_set_eval_breaker_bit(tstate->interp, _PY_GC_SCHEDULED_BIT, 0);
if (_Py_eval_breaker_bit_is_set(tstate, _PY_GC_SCHEDULED_BIT)) {
_Py_unset_eval_breaker_bit(tstate, _PY_GC_SCHEDULED_BIT);
_Py_RunGC(tstate);
}

Expand Down
2 changes: 1 addition & 1 deletion Python/brc.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ _Py_brc_queue_object(PyObject *ob)
}

// Notify owning thread
_Py_set_eval_breaker_bit(interp, _PY_EVAL_EXPLICIT_MERGE_BIT, 1);
_Py_set_eval_breaker_bit(&tstate->base, _PY_EVAL_EXPLICIT_MERGE_BIT);

PyMutex_Unlock(&bucket->mutex);
}
Expand Down
7 changes: 3 additions & 4 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

#include "Python.h"
#include "pycore_abstract.h" // _PyIndex_Check()
#include "pycore_ceval.h" // _PyEval_SignalAsyncExc()
#include "pycore_code.h"
#include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS
#include "pycore_function.h"
Expand Down Expand Up @@ -146,7 +145,7 @@ dummy_func(
TIER_ONE_ONLY
assert(frame == tstate->current_frame);
uintptr_t global_version =
_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) &
_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) &
~_PY_EVAL_EVENTS_MASK;
uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
assert((code_version & 255) == 0);
Expand All @@ -168,14 +167,14 @@ dummy_func(
DEOPT_IF(_Py_emscripten_signal_clock == 0);
_Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
#endif
uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker);
uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
assert((version & _PY_EVAL_EVENTS_MASK) == 0);
DEOPT_IF(eval_breaker != version);
}

inst(INSTRUMENTED_RESUME, (--)) {
uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~_PY_EVAL_EVENTS_MASK;
uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK;
uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
if (code_version != global_version) {
if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) {
Expand Down
2 changes: 1 addition & 1 deletion Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "Python.h"
#include "pycore_abstract.h" // _PyIndex_Check()
#include "pycore_call.h" // _PyObject_CallNoArgs()
#include "pycore_ceval.h" // _PyEval_SignalAsyncExc()
#include "pycore_ceval.h"
#include "pycore_code.h"
#include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS
#include "pycore_function.h"
Expand Down

0 comments on commit 6f23410

Please sign in to comment.