Skip to content

Commit

Permalink
pythonGH-113853: Guarantee forward progress in executors (pythonGH-11…
Browse files Browse the repository at this point in the history
  • Loading branch information
markshannon authored and aisk committed Feb 11, 2024
1 parent d767935 commit 668ffc2
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 74 deletions.
2 changes: 1 addition & 1 deletion Include/cpython/optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void);
PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset);

int
_PyOptimizer_BackEdge(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNIT *dest, PyObject **stack_pointer);
_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer);

extern _PyOptimizerObject _PyOptimizer_Default;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Guarantee that all executors make progress. This then guarantees that tier 2
execution always makes progress.
18 changes: 13 additions & 5 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2325,12 +2325,18 @@ dummy_func(
// Double-check that the opcode isn't instrumented or something:
if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
OPT_STAT_INC(attempts);
int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer);
_Py_CODEUNIT *start = this_instr;
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
while (oparg > 255) {
oparg >>= 8;
start--;
}
int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer);
ERROR_IF(optimized < 0, error);
if (optimized) {
// Rewind and enter the executor:
assert(this_instr->op.code == ENTER_EXECUTOR);
next_instr = this_instr;
assert(start->op.code == ENTER_EXECUTOR);
next_instr = start;
this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
}
else {
Expand Down Expand Up @@ -2370,10 +2376,12 @@ dummy_func(
GOTO_TIER_TWO();
}
else {
code->co_executors->executors[oparg & 255] = NULL;
/* ENTER_EXECUTOR will be the first code unit of the instruction */
assert(oparg < 256);
code->co_executors->executors[oparg] = NULL;
opcode = this_instr->op.code = executor->vm_data.opcode;
this_instr->op.arg = executor->vm_data.oparg;
oparg = (oparg & (~255)) | executor->vm_data.oparg;
oparg = executor->vm_data.oparg;
Py_DECREF(executor);
next_instr = this_instr;
DISPATCH_GOTO();
Expand Down
18 changes: 13 additions & 5 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

155 changes: 92 additions & 63 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,24 +162,22 @@ PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
}

int
_PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNIT *dest, PyObject **stack_pointer)
_PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer)
{
assert(src->op.code == JUMP_BACKWARD);
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
assert(PyCode_Check(code));
PyInterpreterState *interp = _PyInterpreterState_GET();
if (!has_space_for_executor(code, src)) {
if (!has_space_for_executor(code, start)) {
return 0;
}
_PyOptimizerObject *opt = interp->optimizer;
_PyExecutorObject *executor = NULL;
/* Start optimizing at the destination to guarantee forward progress */
int err = opt->optimize(opt, code, dest, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
int err = opt->optimize(opt, code, start, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
if (err <= 0) {
assert(executor == NULL);
return err;
}
int index = get_index_for_executor(code, src);
int index = get_index_for_executor(code, start);
if (index < 0) {
/* Out of memory. Don't raise and assume that the
* error will show up elsewhere.
Expand All @@ -190,7 +188,7 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNI
Py_DECREF(executor);
return 0;
}
insert_executor(code, src, index, executor);
insert_executor(code, start, index, executor);
Py_DECREF(executor);
return 1;
}
Expand Down Expand Up @@ -316,38 +314,6 @@ BRANCH_TO_GUARD[4][2] = {
#define CONFIDENCE_RANGE 1000
#define CONFIDENCE_CUTOFF 333

/* Returns 1 on success,
* 0 if it failed to produce a worthwhile trace,
* and -1 on an error.
*/
static int
translate_bytecode_to_trace(
PyCodeObject *code,
_Py_CODEUNIT *instr,
_PyUOpInstruction *trace,
int buffer_size,
_PyBloomFilter *dependencies)
{
PyCodeObject *initial_code = code;
_Py_BloomFilter_Add(dependencies, initial_code);
_Py_CODEUNIT *initial_instr = instr;
int trace_length = 0;
int max_length = buffer_size;
struct {
PyCodeObject *code;
_Py_CODEUNIT *instr;
} trace_stack[TRACE_STACK_SIZE];
int trace_stack_depth = 0;
int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions

#ifdef Py_DEBUG
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
int lltrace = 0;
if (python_lltrace != NULL && *python_lltrace >= '0') {
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
}
#endif

#ifdef Py_DEBUG
#define DPRINTF(level, ...) \
if (lltrace >= (level)) { printf(__VA_ARGS__); }
Expand Down Expand Up @@ -403,13 +369,47 @@ translate_bytecode_to_trace(
code = trace_stack[trace_stack_depth].code; \
instr = trace_stack[trace_stack_depth].instr;

/* Returns 1 on success,
* 0 if it failed to produce a worthwhile trace,
* and -1 on an error.
*/
static int
translate_bytecode_to_trace(
PyCodeObject *code,
_Py_CODEUNIT *instr,
_PyUOpInstruction *trace,
int buffer_size,
_PyBloomFilter *dependencies)
{
bool progress_needed = true;
PyCodeObject *initial_code = code;
_Py_BloomFilter_Add(dependencies, initial_code);
_Py_CODEUNIT *initial_instr = instr;
int trace_length = 0;
int max_length = buffer_size;
struct {
PyCodeObject *code;
_Py_CODEUNIT *instr;
} trace_stack[TRACE_STACK_SIZE];
int trace_stack_depth = 0;
int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions

#ifdef Py_DEBUG
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
int lltrace = 0;
if (python_lltrace != NULL && *python_lltrace >= '0') {
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
}
#endif

DPRINTF(4,
"Optimizing %s (%s:%d) at byte offset %d\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code));
uint32_t target = 0;

top: // Jump here after _PUSH_FRAME or likely branches
for (;;) {
target = INSTR_IP(instr, code);
Expand All @@ -421,6 +421,15 @@ translate_bytecode_to_trace(
uint32_t oparg = instr->op.arg;
uint32_t extended = 0;

if (opcode == ENTER_EXECUTOR) {
assert(oparg < 256);
_PyExecutorObject *executor =
(_PyExecutorObject *)code->co_executors->executors[oparg];
opcode = executor->vm_data.opcode;
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
oparg = executor->vm_data.oparg;
}

if (opcode == EXTENDED_ARG) {
instr++;
extended = 1;
Expand All @@ -431,13 +440,23 @@ translate_bytecode_to_trace(
goto done;
}
}

if (opcode == ENTER_EXECUTOR) {
_PyExecutorObject *executor =
(_PyExecutorObject *)code->co_executors->executors[oparg&255];
opcode = executor->vm_data.opcode;
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);

/* Special case the first instruction,
* so that we can guarantee forward progress */
if (progress_needed) {
progress_needed = false;
if (opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_NO_INTERRUPT) {
instr += 1 + _PyOpcode_Caches[opcode] - (int32_t)oparg;
initial_instr = instr;
continue;
}
else {
if (OPCODE_HAS_DEOPT(opcode)) {
opcode = _PyOpcode_Deopt[opcode];
}
assert(!OPCODE_HAS_DEOPT(opcode));
}
}

switch (opcode) {
Expand Down Expand Up @@ -480,7 +499,9 @@ translate_bytecode_to_trace(
case JUMP_BACKWARD:
case JUMP_BACKWARD_NO_INTERRUPT:
{
if (instr + 2 - oparg == initial_instr && code == initial_code) {
_Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[opcode] - (int)oparg;
if (target == initial_instr) {
/* We have looped round to the start */
RESERVE(1);
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
}
Expand Down Expand Up @@ -641,35 +662,33 @@ translate_bytecode_to_trace(
}
assert(code == initial_code);
// Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
if (trace_length > 4) {
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
DPRINTF(1,
"Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code),
trace_length);
OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
return 1;
}
else {
if (progress_needed || trace_length < 5) {
OPT_STAT_INC(trace_too_short);
DPRINTF(4,
"No trace for %s (%s:%d) at byte offset %d\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code));
return 0;
}
return 0;
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
DPRINTF(1,
"Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code),
trace_length);
OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
return 1;
}

#undef RESERVE
#undef RESERVE_RAW
#undef INSTR_IP
#undef ADD_TO_TRACE
#undef DPRINTF
}

#define UNSET_BIT(array, bit) (array[(bit)>>5] &= ~(1<<((bit)&31)))
#define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31)))
Expand Down Expand Up @@ -854,10 +873,20 @@ counter_optimize(
int Py_UNUSED(curr_stackentries)
)
{
int oparg = instr->op.arg;
while (instr->op.code == EXTENDED_ARG) {
instr++;
oparg = (oparg << 8) | instr->op.arg;
}
if (instr->op.code != JUMP_BACKWARD) {
/* Counter optimizer can only handle backward edges */
return 0;
}
_Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[JUMP_BACKWARD] - oparg;
_PyUOpInstruction buffer[3] = {
{ .opcode = _LOAD_CONST_INLINE_BORROW, .operand = (uintptr_t)self },
{ .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
{ .opcode = _EXIT_TRACE, .target = (uint32_t)(instr - _PyCode_CODE(code)) }
{ .opcode = _EXIT_TRACE, .target = (uint32_t)(target - _PyCode_CODE(code)) }
};
_PyBloomFilter empty;
_Py_BloomFilter_Init(&empty);
Expand Down

0 comments on commit 668ffc2

Please sign in to comment.