Skip to content

Commit

Permalink
pythongh-112529: Make the GC scheduling thread-safe
Browse files Browse the repository at this point in the history
The GC keeps track of the number of allocations (less deallocations)
since the last GC. This buffers the count in thread-local state and uses
atomic operations to modify the per-interpreter count. The thread-local
buffering avoids contention on shared state.

A consequence is that the GC scheduling is not as precise, so
"test_sneaky_frame_object" is skipped because it requires that the GC be
run exactly after allocating a frame object.
  • Loading branch information
colesbury committed Feb 1, 2024
1 parent 587d480 commit 5f53ebd
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 16 deletions.
7 changes: 7 additions & 0 deletions Include/internal/pycore_gc.h
Expand Up @@ -243,6 +243,13 @@ struct _gc_runtime_state {
Py_ssize_t long_lived_pending;
};

#ifdef Py_GIL_DISABLED
struct _gc_thread_state {
/* Thread-local allocation count. */
Py_ssize_t alloc_count;
};
#endif


extern void _PyGC_InitState(struct _gc_runtime_state *);

Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_tstate.h
Expand Up @@ -20,6 +20,7 @@ typedef struct _PyThreadStateImpl {
PyThreadState base;

#ifdef Py_GIL_DISABLED
struct _gc_thread_state gc;
struct _mimalloc_thread_state mimalloc;
struct _Py_freelist_state freelist_state;
#endif
Expand Down
3 changes: 2 additions & 1 deletion Lib/test/test_frame.py
Expand Up @@ -13,7 +13,7 @@
_testcapi = None

from test import support
from test.support import threading_helper
from test.support import threading_helper, Py_GIL_DISABLED
from test.support.script_helper import assert_python_ok


Expand Down Expand Up @@ -295,6 +295,7 @@ def gen():
assert_python_ok("-c", code)

@support.cpython_only
@unittest.skipIf(Py_GIL_DISABLED, "test requires precise GC scheduling")
def test_sneaky_frame_object(self):

def trace(frame, event, arg):
Expand Down
10 changes: 10 additions & 0 deletions Modules/gcmodule.c
Expand Up @@ -201,6 +201,16 @@ gc_get_count_impl(PyObject *module)
/*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
{
GCState *gcstate = get_gc_state();

#ifdef Py_GIL_DISABLED
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
struct _gc_thread_state *gc = &tstate->gc;

// Flush the local allocation count to the global count
_Py_atomic_add_int(&gcstate->generations[0].count, gc->alloc_count);
gc->alloc_count = 0;
#endif

return Py_BuildValue("(iii)",
gcstate->generations[0].count,
gcstate->generations[1].count,
Expand Down
2 changes: 2 additions & 0 deletions Objects/typeobject.c
Expand Up @@ -1739,6 +1739,8 @@ _PyType_AllocNoTrack(PyTypeObject *type, Py_ssize_t nitems)
if (presize) {
((PyObject **)alloc)[0] = NULL;
((PyObject **)alloc)[1] = NULL;
}
if (PyType_IS_GC(type)) {
_PyObject_GC_Link(obj);
}
memset(obj, '\0', size);
Expand Down
63 changes: 48 additions & 15 deletions Python/gc_free_threading.c
Expand Up @@ -22,6 +22,11 @@ typedef struct _gc_runtime_state GCState;
# define GC_DEBUG
#endif

// Each thread buffers the count of allocated objects in a thread-local
// variable up to +/- this amount to reduce the overhead of updating
// the global count.
#define LOCAL_ALLOC_COUNT_THRESHOLD 512

// Automatically choose the generation that needs collecting.
#define GENERATION_AUTO (-1)

Expand Down Expand Up @@ -923,6 +928,41 @@ gc_should_collect(GCState *gcstate)
gcstate->generations[1].threshold == 0);
}

static void
record_allocation(PyThreadState *tstate)
{
struct _gc_thread_state *gc = &((_PyThreadStateImpl *)tstate)->gc;

// We buffer the allocation count to avoid the overhead of atomic
// operations for every allocation.
gc->alloc_count++;
if (gc->alloc_count >= LOCAL_ALLOC_COUNT_THRESHOLD) {
// TODO: Use Py_ssize_t for the generation count.
GCState *gcstate = &tstate->interp->gc;
_Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count);
gc->alloc_count = 0;

if (gc_should_collect(gcstate) &&
!_Py_atomic_load_int_relaxed(&gcstate->collecting))
{
_Py_ScheduleGC(tstate->interp);
}
}
}

static void
record_deallocation(PyThreadState *tstate)
{
struct _gc_thread_state *gc = &((_PyThreadStateImpl *)tstate)->gc;

gc->alloc_count--;
if (gc->alloc_count <= -LOCAL_ALLOC_COUNT_THRESHOLD) {
GCState *gcstate = &tstate->interp->gc;
_Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count);
gc->alloc_count = 0;
}
}

static void
gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
{
Expand All @@ -942,6 +982,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
}
}

// Record the number of live GC objects
interp->gc.long_lived_total = state->long_lived_total;

// Clear weakrefs and enqueue callbacks (but do not call them).
clear_weakrefs(state);
_PyEval_StartTheWorld(interp);
Expand Down Expand Up @@ -1048,7 +1091,6 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)

m = state.collected;
n = state.uncollectable;
gcstate->long_lived_total = state.long_lived_total;

if (gcstate->debug & _PyGC_DEBUG_STATS) {
double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
Expand Down Expand Up @@ -1488,15 +1530,7 @@ _Py_ScheduleGC(PyInterpreterState *interp)
void
_PyObject_GC_Link(PyObject *op)
{
PyThreadState *tstate = _PyThreadState_GET();
GCState *gcstate = &tstate->interp->gc;
gcstate->generations[0].count++;

if (gc_should_collect(gcstate) &&
!_Py_atomic_load_int_relaxed(&gcstate->collecting))
{
_Py_ScheduleGC(tstate->interp);
}
record_allocation(_PyThreadState_GET());
}

void
Expand All @@ -1522,7 +1556,7 @@ gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
((PyObject **)mem)[1] = NULL;
}
PyObject *op = (PyObject *)(mem + presize);
_PyObject_GC_Link(op);
record_allocation(tstate);
return op;
}

Expand Down Expand Up @@ -1604,10 +1638,9 @@ PyObject_GC_Del(void *op)
PyErr_SetRaisedException(exc);
#endif
}
GCState *gcstate = get_gc_state();
if (gcstate->generations[0].count > 0) {
gcstate->generations[0].count--;
}

record_deallocation(_PyThreadState_GET());

PyObject_Free(((char *)op)-presize);
}

Expand Down

0 comments on commit 5f53ebd

Please sign in to comment.