Skip to content

Commit

Permalink
Add per-PyCodeObject interpreter instruction counter
Browse files Browse the repository at this point in the history
Summary:
Adds a mode to Cinder which disables JIT and then counts how many bytecodes are executed in the interpreter loop per-`PyCodeObject`. This has to be per-`PyCodeObject` as we do not always have access to a `PyFunctionObject` in the interpreter loop.

The goal is to maybe build a JIT-list from this and get some insight where the interpreter spends its time.

Data can be retrieved using `cinder.get_and_clear_code_interp_count()`, which as the name implies also clears the data. This allows us to accumulate data over multiple calls without worrying about duplicate attribution.

I've identified the `PyCodeObject`s with a string of the form `<name|qualname>@<file>:<lineno>`. This is not perfect, for example it's not unique for code generated on the fly (you'll see these with the form `...@<string>:1`). It should be good-enough for a JIT-list though. A side-effect of encoding code identifiers using file + line number means it's now MUCH easier to find where most code actually comes from.

Technical notes:
* The architecture is a bit weird, with most of the code living in `pyjit.cpp`, but the Python function being exposed in `cinder.c`. The reason for this is I want the simplicity of using C++ data structures to store the data, but with cycle counting enabled JIT is disabled so the API can't be in `cinderjit`.
* I hope the changes in `ceval.c` are as light-weight as possible. I can't really measure this without something like Swayze though. Thanks to advice from DinoV the cost should be quite well hidden.
* I have not tried to optimize anything outside of `ceval.c` as everything will be running slower anyway with this mode enabled.

Reviewed By: DinoV

Differential Revision: D26783332

fbshipit-source-id: 25fe116
  • Loading branch information
jbower-fb authored and facebook-github-bot committed Jun 1, 2021
1 parent ca5d308 commit 0764b2e
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 19 deletions.
72 changes: 72 additions & 0 deletions Jit/pyjit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "Jit/pyjit.h"

#include "Python.h"
//#include "internal/pycore_pystate.h"
#include "internal/pycore_shadow_frame.h"

#include "Include/internal/pycore_pystate.h"
Expand Down Expand Up @@ -88,6 +89,10 @@ INTERNED_STRINGS(DECLARE_STR)

static double total_compliation_time = 0.0;

// This is read directly from ceval.c to minimize overhead.
int g_capture_interp_cost = 0;
static std::unordered_map<std::string, long> g_code_interp_cost;

struct CompilationTimer {
explicit CompilationTimer(BorrowedRef<PyFunctionObject> f)
: start(std::chrono::steady_clock::now()), func(f) {}
Expand Down Expand Up @@ -529,6 +534,24 @@ static PyObject* jit_force_normal_frame(PyObject*, PyObject* func_obj) {
return func_obj;
}

extern "C" {
PyObject* _PyJIT_GetAndClearCodeInterpCost(void) {
if (!g_capture_interp_cost) {
Py_RETURN_NONE;
}
PyObject* dict = PyDict_New();
if (dict == NULL) {
return NULL;
}
for (const auto& entry : g_code_interp_cost) {
PyDict_SetItemString(
dict, entry.first.c_str(), PyLong_FromLong(entry.second));
}
g_code_interp_cost.clear();
return dict;
}
}

static PyMethodDef jit_methods[] = {
{"disable",
(PyCFunction)(void*)disable_jit,
Expand Down Expand Up @@ -809,6 +832,17 @@ int _PyJIT_Initialize() {
}
}

if (_is_flag_set("jit-capture-interp-cost", "PYTHONJITCAPTUREINTERPCOST")) {
if (use_jit) {
use_jit = 0;
JIT_LOG("Keeping JIT disabled to capture interpreter cost.");
}
g_capture_interp_cost = 1;
// Hack to help mitigate the cost of tracing during normal production. See
// ceval.c where the cost counting happens for more details.
_PyRuntime.ceval.tracing_possible++;
}

if (use_jit) {
JIT_DLOG("Enabling JIT.");
} else {
Expand Down Expand Up @@ -876,6 +910,44 @@ int _PyJIT_Initialize() {
return 0;
}

static std::string key_for_py_code_object(PyCodeObject* code) {
Py_ssize_t name_len;
PyObject* py_name = code->co_qualname ? code->co_qualname : code->co_name;
const char* name = PyUnicode_AsUTF8AndSize(py_name, &name_len);
Py_ssize_t fn_len;
const char* fn = PyUnicode_AsUTF8AndSize(code->co_filename, &fn_len);
return fmt::format(
"{}@{}:{}",
std::string{name, static_cast<std::string::size_type>(name_len)},
std::string{fn, static_cast<std::string::size_type>(fn_len)},
code->co_firstlineno);
}

static std::unordered_map<PyCodeObject*, std::string> g_code_key_cache_;

void _PyJIT_InvalidateCodeKey(PyCodeObject* code) {
if (!g_capture_interp_cost) {
return;
}
g_code_key_cache_.erase(code);
}

void _PyJIT_BumpCodeInterpCost(PyCodeObject* code, long cost) {
std::string key;
auto key_cache_entry = g_code_key_cache_.find(code);
if (key_cache_entry == g_code_key_cache_.end()) {
key = key_for_py_code_object(reinterpret_cast<PyCodeObject*>(code));
g_code_key_cache_[code] = key;
} else {
key = key_cache_entry->second;
}
auto entry = g_code_interp_cost.find(key);
if (entry == g_code_interp_cost.end()) {
entry = g_code_interp_cost.emplace(key, 0).first;
}
entry->second += cost;
}

int _PyJIT_IsEnabled() {
return (jit_config.init_state == JIT_INITIALIZED) && jit_config.is_enabled;
}
Expand Down
13 changes: 13 additions & 0 deletions Jit/pyjit.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ extern "C" {
*/
PyAPI_FUNC(int) _PyJIT_Initialize(void);

/*
* Notify interpreter cost calculator that a PyCodeObject is released as the
* same pointer may be used for another code object but need a new string
* identity key.
*/
PyAPI_FUNC(void) _PyJIT_InvalidateCodeKey(PyCodeObject* code);

/*
* Used by the interpreter to attribute runtime "cost" to code objects. This
* only has an effect if interpreter cost counting is enabled.
*/
PyAPI_FUNC(void) _PyJIT_BumpCodeInterpCost(PyCodeObject* code, long cost);

/*
* Enable the global JIT.
*
Expand Down
12 changes: 12 additions & 0 deletions Modules/cinder.c
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,13 @@ set_qualname_of_code(PyObject *Py_UNUSED(module), PyObject **args, Py_ssize_t na
Py_RETURN_NONE;
}

PyAPI_FUNC(PyObject*) _PyJIT_GetAndClearCodeInterpCost(void);

static PyObject*
get_and_clear_code_interp_cost(PyObject *self, PyObject *obj) {
return _PyJIT_GetAndClearCodeInterpCost();
}

static struct PyMethodDef cinder_module_methods[] = {
{"setknobs", cinder_setknobs, METH_O, setknobs_doc},
{"getknobs", cinder_getknobs, METH_NOARGS, getknobs_doc},
Expand Down Expand Up @@ -366,6 +373,11 @@ static struct PyMethodDef cinder_module_methods[] = {
(PyCFunction)set_qualname_of_code,
METH_FASTCALL,
"Sets the value of qualified name in code object"},
{"get_and_clear_code_interp_cost",
get_and_clear_code_interp_cost,
METH_NOARGS,
"Get and clear accumulated interpreter cost for code objects."},

{NULL, NULL} /* sentinel */
};

Expand Down
3 changes: 3 additions & 0 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include "clinic/codeobject.c.h"
#include "Jit/pyjit.h"

#include "Jit/pyjit.h"

/* Holder for co_extra information */
typedef struct {
Py_ssize_t ce_size;
Expand Down Expand Up @@ -517,6 +519,7 @@ code_dealloc(PyCodeObject *co)
PyMem_Free(co_extra);
}
_PyShadow_ClearCache((PyObject *)co); /* facebook t39538061 */
_PyJIT_InvalidateCodeKey(co);

Py_XDECREF(co->co_code);
Py_XDECREF(co->co_consts);
Expand Down
51 changes: 32 additions & 19 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ int32_t __strobe_PyVersion_micro = PY_MICRO_VERSION;
extern int _PyObject_GetMethod(PyObject *, PyObject *, PyObject **);
extern PyObject * _PySuper_Lookup(PyTypeObject *type, PyObject *obj, PyObject *name, PyObject *super_instance, int *meth_found);

// Exposed directly from pyjit.cpp to minimize overhead.
extern int g_capture_interp_cost;


/* Begin FB (T37304853) */
#ifdef WITH_DTRACE
Expand Down Expand Up @@ -958,6 +961,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
_Py_atomic_int * const eval_breaker = &ceval->eval_breaker;
PyCodeObject *co;
_PyShadowFrame shadow_frame;
long code_cost = 0;

/* when tracing we set things up so that
Expand Down Expand Up @@ -1462,26 +1466,31 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)

/* line-by-line tracing support */

if (_Py_TracingPossible(ceval) &&
tstate->c_tracefunc != NULL && !tstate->tracing) {
int err;
/* see maybe_call_line_trace
for expository comments */
f->f_stacktop = stack_pointer;

err = maybe_call_line_trace(tstate->c_tracefunc,
tstate->c_traceobj,
tstate, f,
&instr_lb, &instr_ub, &instr_prev);
/* Reload possibly changed frame fields */
JUMPTO(f->f_lasti);
if (f->f_stacktop != NULL) {
stack_pointer = f->f_stacktop;
f->f_stacktop = NULL;
if (_Py_TracingPossible(ceval)) {
/* Guarding the interpreter cost counting in _Py_TracingPossible is
a hack to hint to the compiler/PGO this isn't on the hot/default
path for production. */
code_cost++;
if (tstate->c_tracefunc != NULL && !tstate->tracing) {
int err;
/* see maybe_call_line_trace
for expository comments */
f->f_stacktop = stack_pointer;

err = maybe_call_line_trace(tstate->c_tracefunc,
tstate->c_traceobj,
tstate, f,
&instr_lb, &instr_ub, &instr_prev);
/* Reload possibly changed frame fields */
JUMPTO(f->f_lasti);
if (f->f_stacktop != NULL) {
stack_pointer = f->f_stacktop;
f->f_stacktop = NULL;
}
if (err)
/* trace function raised an exception */
goto error;
}
if (err)
/* trace function raised an exception */
goto error;
}

/* Begin FB (T37304853) */
Expand Down Expand Up @@ -5825,6 +5834,10 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
co->co_cache.curcalls--;
_PyShadowFrame_Pop(tstate, &shadow_frame);

if (g_capture_interp_cost) {
_PyJIT_BumpCodeInterpCost(f->f_code, code_cost);
}

return _Py_CheckFunctionResult(tstate, NULL, retval, "PyEval_EvalFrameEx");
}

Expand Down

0 comments on commit 0764b2e

Please sign in to comment.