diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index d2e29a1b95ede2f..e9a76e2f3f2ca34 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1315,7 +1315,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [SET_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [STORE_ATTR] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [STORE_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, - [STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, + [STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, [STORE_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, [STORE_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG }, [STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 6713e9bc95f942d..e572110dbdd28fc 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -239,7 +239,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_STORE_ATTR_INSTANCE_VALUE] = HAS_ESCAPES_FLAG, [_LOCK_OBJECT] = HAS_DEOPT_FLAG, [_STORE_ATTR_WITH_HINT] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_STORE_ATTR_SLOT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_STORE_ATTR_SLOT] = HAS_ESCAPES_FLAG, [_COMPARE_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_COMPARE_OP_FLOAT] = HAS_ARG_FLAG, [_COMPARE_OP_INT] = HAS_ARG_FLAG, diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index a2506524f0bb6dc..c648b5a5d228c68 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -11679,16 +11679,17 @@ value = stack_pointer[-2]; uint16_t index = read_u16(&this_instr[4].cache); PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); - if (!LOCK_OBJECT(owner_o)) { - UPDATE_MISS_STATS(STORE_ATTR); - assert(_PyOpcode_Deopt[opcode] == (STORE_ATTR)); - JUMP_TO_PREDICTED(STORE_ATTR); - } char *addr = (char *)owner_o + index; STAT_INC(STORE_ATTR, hit); + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *old_value = _Py_atomic_exchange_ptr( + (void **)addr, PyStackRef_AsPyObjectSteal(value)); + stack_pointer = _PyFrame_GetStackPointer(frame); + #else PyObject *old_value = *(PyObject **)addr; - FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, PyStackRef_AsPyObjectSteal(value)); - UNLOCK_OBJECT(owner_o); + *(PyObject **)addr = PyStackRef_AsPyObjectSteal(value); + #endif o = owner; stack_pointer[-2] = o; stack_pointer += -1; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f7487c7136962f1..53105ad23395f75 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3225,14 +3225,27 @@ dummy_func( op(_STORE_ATTR_SLOT, (index/1, value, owner -- o)) { PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); - DEOPT_IF(!LOCK_OBJECT(owner_o)); char *addr = (char *)owner_o + index; STAT_INC(STORE_ATTR, hit); + // Lock-free read-modify-write on the slot pointer (free-threaded + // build). Atomic exchange returns the previous value and stores + // the new one in a single atomic op. Each concurrent writer + // observes a unique old value (so no double-decref), and + // concurrent LOAD_ATTR_SLOT readers already cope with mid-flight + // mutations via _Py_TryIncrefCompare. The C-API path + // (PyMember_SetOne for Py_T_OBJECT_EX / _Py_T_OBJECT) is updated + // to the same atomic-exchange pattern so the two paths stay + // consistent without a per-object lock. On the GIL build this + // compiles to a plain read+write. +#ifdef Py_GIL_DISABLED + PyObject *old_value = _Py_atomic_exchange_ptr( + (void **)addr, PyStackRef_AsPyObjectSteal(value)); +#else PyObject *old_value = *(PyObject **)addr; - FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, PyStackRef_AsPyObjectSteal(value)); - UNLOCK_OBJECT(owner_o); - INPUTS_DEAD(); + *(PyObject **)addr = PyStackRef_AsPyObjectSteal(value); +#endif o = owner; + DEAD(owner); Py_XDECREF(old_value); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index efa61d7de74e88c..fb20e44cb57de69 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -13066,21 +13066,25 @@ value = _stack_item_0; uint16_t index = (uint16_t)CURRENT_OPERAND0_16(); PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); - if (!LOCK_OBJECT(owner_o)) { - UOP_STAT_INC(uopcode, miss); - _tos_cache1 = owner; - _tos_cache0 = value; - SET_CURRENT_CACHED_VALUES(2); - JUMP_TO_JUMP_TARGET(); - } char *addr = (char *)owner_o + index; STAT_INC(STORE_ATTR, hit); + #ifdef Py_GIL_DISABLED + stack_pointer[0] = value; + stack_pointer[1] = owner; + stack_pointer += 2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *old_value = _Py_atomic_exchange_ptr( + (void **)addr, PyStackRef_AsPyObjectSteal(value)); + stack_pointer = _PyFrame_GetStackPointer(frame); + #else PyObject *old_value = *(PyObject **)addr; - FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, PyStackRef_AsPyObjectSteal(value)); - UNLOCK_OBJECT(owner_o); + *(PyObject **)addr = PyStackRef_AsPyObjectSteal(value); + stack_pointer += 2; + #endif o = owner; - stack_pointer[0] = o; - stack_pointer += 1; + stack_pointer[-2] = o; + stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); Py_XDECREF(old_value); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 53e09a8f4523c7c..d4a7fdb4dea6e14 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -11676,16 +11676,17 @@ value = stack_pointer[-2]; uint16_t index = read_u16(&this_instr[4].cache); PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); - if (!LOCK_OBJECT(owner_o)) { - UPDATE_MISS_STATS(STORE_ATTR); - assert(_PyOpcode_Deopt[opcode] == (STORE_ATTR)); - JUMP_TO_PREDICTED(STORE_ATTR); - } char *addr = (char *)owner_o + index; STAT_INC(STORE_ATTR, hit); + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *old_value = _Py_atomic_exchange_ptr( + (void **)addr, PyStackRef_AsPyObjectSteal(value)); + stack_pointer = _PyFrame_GetStackPointer(frame); + #else PyObject *old_value = *(PyObject **)addr; - FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, PyStackRef_AsPyObjectSteal(value)); - UNLOCK_OBJECT(owner_o); + *(PyObject **)addr = PyStackRef_AsPyObjectSteal(value); + #endif o = owner; stack_pointer[-2] = o; stack_pointer += -1; diff --git a/Python/structmember.c b/Python/structmember.c index adea8216b8796b5..9eb18f14a286c36 100644 --- a/Python/structmember.c +++ b/Python/structmember.c @@ -97,36 +97,33 @@ PyMember_GetOne(const char *obj_addr, PyMemberDef *l) break; } case _Py_T_OBJECT: - v = FT_ATOMIC_LOAD_PTR(*(PyObject **) addr); - if (v != NULL) { #ifdef Py_GIL_DISABLED - if (!_Py_TryIncrefCompare((PyObject **) addr, v)) { - Py_BEGIN_CRITICAL_SECTION((PyObject *) obj_addr); - v = FT_ATOMIC_LOAD_PTR(*(PyObject **) addr); - Py_XINCREF(v); - Py_END_CRITICAL_SECTION(); - } + // _Py_XGetRef does an atomic load + try-incref loop, retrying when + // a concurrent writer (lock-free atomic exchange in _STORE_ATTR_SLOT + // and PyMember_SetOne) mutates the slot. The previous critical- + // section fallback assumed writers also took ob_mutex, which they no + // longer do, so a plain Py_XINCREF inside the CS could resurrect a + // refcount-0 (about-to-be-freed) object. + v = _Py_XGetRef((PyObject **)addr); #else - Py_INCREF(v); + v = *(PyObject **)addr; + Py_XINCREF(v); #endif - } if (v == NULL) { - v = Py_None; + v = Py_NewRef(Py_None); } break; case Py_T_OBJECT_EX: +#ifdef Py_GIL_DISABLED + v = _Py_XGetRef((PyObject **)addr); + if (v == NULL) { + PyErr_Format(PyExc_AttributeError, + "'%T' object has no attribute '%s'", + (PyObject *)obj_addr, l->name); + } +#else v = member_get_object(addr, obj_addr, l); -#ifndef Py_GIL_DISABLED Py_XINCREF(v); -#else - if (v != NULL) { - if (!_Py_TryIncrefCompare((PyObject **) addr, v)) { - Py_BEGIN_CRITICAL_SECTION((PyObject *) obj_addr); - v = member_get_object(addr, obj_addr, l); - Py_XINCREF(v); - Py_END_CRITICAL_SECTION(); - } - } #endif break; case Py_T_LONGLONG: @@ -320,11 +317,20 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) break; } case _Py_T_OBJECT: - case Py_T_OBJECT_EX: - Py_BEGIN_CRITICAL_SECTION(obj); + case Py_T_OBJECT_EX: { + // Lock-free swap to match the _STORE_ATTR_SLOT bytecode handler + // (which also uses atomic exchange on Py_GIL_DISABLED builds). + // Atomic exchange gives each concurrent writer a unique old value, + // so the Py_XDECREF below cannot double-free. Concurrent readers + // (PyMember_GetOne, _LOAD_ATTR_SLOT) use atomic loads and the + // TryIncrefCompare pattern to cope with mid-flight mutations. + PyObject *new_v = Py_XNewRef(v); +#ifdef Py_GIL_DISABLED + oldv = (PyObject *)_Py_atomic_exchange_ptr((void **)addr, new_v); +#else oldv = *(PyObject **)addr; - FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, Py_XNewRef(v)); - Py_END_CRITICAL_SECTION(); + *(PyObject **)addr = new_v; +#endif if (v == NULL && oldv == NULL && l->type == Py_T_OBJECT_EX) { // Raise an exception when attempting to delete an already deleted // attribute. @@ -336,6 +342,7 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) } Py_XDECREF(oldv); break; + } case Py_T_CHAR: { const char *string; Py_ssize_t len;