Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use vectorcall by default #5804

Merged
merged 17 commits into from Jan 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
196 changes: 160 additions & 36 deletions Cython/Compiler/ExprNodes.py
Expand Up @@ -5984,6 +5984,16 @@ def analyse_as_type_constructor(self, env):
self.type = type
return True

def function_type(self):
# Return the type of the function being called, coercing a function
# pointer to a function if necessary.
func_type = self.function.type

if func_type.is_ptr:
func_type = func_type.base_type

return func_type

def is_lvalue(self):
return self.type.is_reference

Expand Down Expand Up @@ -6106,17 +6116,6 @@ def analyse_types(self, env):

return self

def function_type(self):
# Return the type of the function being called, coercing a function
# pointer to a function if necessary. If the function has fused
# arguments, return the specific type.
func_type = self.function.type

if func_type.is_ptr:
func_type = func_type.base_type

return func_type

def analyse_c_function_call(self, env):
func_type = self.function.type
if func_type is error_type:
Expand Down Expand Up @@ -6525,15 +6524,18 @@ def generate_evaluation_code(self, code):
", ".join(a.pythran_result() for a in args)))


class PyMethodCallNode(SimpleCallNode):
class PyMethodCallNode(CallNode):
# Specialised call to a (potential) PyMethodObject with non-constant argument tuple.
# Allows the self argument to be injected directly instead of repacking a tuple for it.
#
# function ExprNode the function/method object to call
# arg_tuple TupleNode the arguments for the args tuple
# kwdict ExprNode or None keyword dictionary (if present)
# unpack bool

subexprs = ['function', 'arg_tuple']
subexprs = ['function', 'arg_tuple', 'kwdict']
is_temp = True
kwdict = None

def generate_evaluation_code(self, code):
code.mark_pos(self.pos)
Expand All @@ -6542,8 +6544,15 @@ def generate_evaluation_code(self, code):
self.function.generate_evaluation_code(code)
assert self.arg_tuple.mult_factor is None
args = self.arg_tuple.args
kwargs_key_value_pairs = None
for arg in args:
arg.generate_evaluation_code(code)
if isinstance(self.kwdict, DictNode):
kwargs_key_value_pairs = self.kwdict.key_value_pairs
for keyvalue in kwargs_key_value_pairs:
keyvalue.generate_evaluation_code(code)
elif self.kwdict:
self.kwdict.generate_evaluation_code(code)

# make sure function is in temp so that we can replace the reference below if it's a method
reuse_function_temp = self.function.is_temp
Expand Down Expand Up @@ -6582,49 +6591,106 @@ def attribute_is_likely_method(attr):
else:
likely_method = 'unlikely'

code.putln("#if CYTHON_UNPACK_METHODS")
code.putln("if (%s(PyMethod_Check(%s))) {" % (likely_method, function))
code.putln("%s = PyMethod_GET_SELF(%s);" % (self_arg, function))
# the following is always true in Py3 (kept only for safety),
# but is false for unbound methods in Py2
code.putln("if (likely(%s)) {" % self_arg)
code.putln("PyObject* function = PyMethod_GET_FUNCTION(%s);" % function)
code.put_incref(self_arg, py_object_type)
code.put_incref("function", py_object_type)
# free method object as early to possible to enable reuse from CPython's freelist
code.put_decref_set(function, py_object_type, "function")
code.putln("%s = 1;" % arg_offset_cname)
code.putln("}")
code.putln("}")
code.putln("#endif") # CYTHON_UNPACK_METHODS
# TODO may need to deal with unused variables in the #else case
if self.unpack:
# unpack is ultimately governed by optimize.unpack_method_calls
# and is a separate decision to whether we want vectorcall-type behaviour
code.putln("#if CYTHON_UNPACK_METHODS")
code.putln("if (%s(PyMethod_Check(%s))) {" % (likely_method, function))
code.putln("%s = PyMethod_GET_SELF(%s);" % (self_arg, function))
# the result of PyMethod_GET_SELF is always true in Py3.
code.putln(f"assert({self_arg});")
code.putln("PyObject* function = PyMethod_GET_FUNCTION(%s);" % function)
code.put_incref(self_arg, py_object_type)
code.put_incref("function", py_object_type)
# free method object as early to possible to enable reuse from CPython's freelist
code.put_decref_set(function, py_object_type, "function")
code.putln("%s = 1;" % arg_offset_cname)
code.putln("}")
code.putln("#endif") # CYTHON_UNPACK_METHODS
# TODO may need to deal with unused variables in the #else case

kwnames_temp = None
if kwargs_key_value_pairs:
code.globalstate.use_utility_code(
UtilityCode.load_cached("PyObjectVectorCallKwBuilder", "ObjectHandling.c"))
function_caller = "__Pyx_Object_Vectorcall_CallFromBuilder"
kwnames_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
code.putln("%s = __Pyx_MakeVectorcallBuilderKwds(%s); %s" % (
kwnames_temp, len(kwargs_key_value_pairs),
code.error_goto_if_null(kwnames_temp, self.pos)
))
code.put_gotref(kwnames_temp, py_object_type)
elif self.kwdict:
code.globalstate.use_utility_code(
UtilityCode.load_cached("PyObjectFastCall", "ObjectHandling.c"))
function_caller = "__Pyx_PyObject_FastCallDict"
else:
code.globalstate.use_utility_code(
UtilityCode.load_cached("PyObjectFastCall", "ObjectHandling.c"))
function_caller = "__Pyx_PyObject_FastCall"
# actually call the function
code.globalstate.use_utility_code(
UtilityCode.load_cached("PyObjectFastCall", "ObjectHandling.c"))


code.putln("{")
extra_keyword_args = ""
if kwargs_key_value_pairs:
extra_keyword_args = f"+ ((CYTHON_VECTORCALL) ? {len(kwargs_key_value_pairs)} : 0)"
# To avoid passing an out-of-bounds argument pointer in the no-args case,
# we need at least two entries, so we pad with NULL and point to that.
# See https://github.com/cython/cython/issues/5668
code.putln("PyObject *__pyx_callargs[%d] = {%s, %s};" % (
code.putln("PyObject *__pyx_callargs[%d%s] = {%s, %s};" % (
(len(args) + 1) if args else 2,
extra_keyword_args,
self_arg,
', '.join(arg.py_result() for arg in args) if args else "NULL",
))
code.putln("%s = __Pyx_PyObject_FastCall(%s, __pyx_callargs+1-%s, %d+%s);" % (
if kwargs_key_value_pairs:
for n, keyvalue in enumerate(kwargs_key_value_pairs):
key_is_str = (
(keyvalue.key.type is Builtin.str_type or keyvalue.key.type is Builtin.unicode_type)
and not keyvalue.key.may_be_none()
)
code.put_error_if_neg(
self.pos,
"__Pyx_VectorcallBuilder_AddArg%s(%s, %s, %s, __pyx_callargs+%d, %d)" % (
"" if key_is_str else "_Check",
keyvalue.key.py_result(),
keyvalue.value.py_result(),
kwnames_temp,
len(args) + 1,
n
))

if kwnames_temp:
keyword_variable = f", {kwnames_temp}"
elif self.kwdict:
keyword_variable = f", {self.kwdict.result()}"
else:
keyword_variable = ""
code.putln("%s = %s(%s, __pyx_callargs+1-%s, %d+%s%s);" % (
self.result(),
function_caller,
function,
arg_offset_cname,
len(args),
arg_offset_cname))
arg_offset_cname,
keyword_variable))

code.put_xdecref_clear(self_arg, py_object_type)
code.funcstate.release_temp(self_arg)
code.funcstate.release_temp(arg_offset_cname)
for arg in args:
arg.generate_disposal_code(code)
arg.free_temps(code)
if kwargs_key_value_pairs:
for keyvalue in kwargs_key_value_pairs:
keyvalue.generate_disposal_code(code)
keyvalue.free_temps(code)
code.put_decref_clear(kwnames_temp, py_object_type)
code.funcstate.release_temp(kwnames_temp)
elif self.kwdict:
self.kwdict.generate_disposal_code(code)
self.kwdict.free_temps(code)
code.putln(code.error_goto_if_null(self.result(), self.pos))
self.generate_gotref(code)

Expand All @@ -6636,6 +6702,51 @@ def attribute_is_likely_method(attr):
code.funcstate.release_temp(function)
code.putln("}")

@staticmethod
def can_be_used_for_posargs(positional_args, has_kwargs, kwds_is_dict_node=None):
"""
Test whether the positional args given are compatible with
being translated into a PyMethodCallNode
"""
if not isinstance(positional_args, TupleNode):
return False
if positional_args.mult_factor:
return False
if positional_args.is_literal and len(positional_args.args) > 1:
return False
if not len(positional_args.args):
# If positional_args is an empty tuple, it's probably only
# worth optimizing if the kwds are f(a=1, b=2) and not
# if they're f(**kwds)
return has_kwargs and kwds_is_dict_node
return True


@staticmethod
def can_be_used_for_function(function):
"""
Test whether the function passed is suitable to be translated
into a PyMethodCallNode
"""
may_be_a_method = True
if function.type is Builtin.type_type:
may_be_a_method = False
elif function.is_attribute:
if function.entry and function.entry.type.is_cfunction:
# optimised builtin method
may_be_a_method = False
elif function.is_name:
entry = function.entry
if entry.is_builtin or entry.type.is_cfunction:
may_be_a_method = False
elif entry.cf_assignments:
# local functions/classes are definitely not methods
non_method_nodes = (PyCFunctionNode, ClassNode, Py3ClassNode)
may_be_a_method = any(
assignment.rhs and not isinstance(assignment.rhs, non_method_nodes)
for assignment in entry.cf_assignments)
return may_be_a_method


class InlinedDefNodeCallNode(CallNode):
# Inline call to defnode
Expand Down Expand Up @@ -7153,12 +7264,25 @@ def generate_evaluation_code(self, code):
code.putln("%s = %s;" % (self.result(), item.py_result()))
item.generate_post_assignment_code(code)
else:
if item.is_temp:
# For the fairly plausible special case where item is a temporary
# with a refcount of 1 (so created specifically for us),
# avoid making a copy
code.putln("#if CYTHON_COMPILING_IN_CPYTHON")
code.putln("if (Py_REFCNT(%s) == 1) {" % item.py_result())
code.putln("%s = %s;" % (self.result(), item.py_result()))
item.generate_post_assignment_code(code)
code.putln("} else")
code.putln("#endif")
code.putln("{")
code.putln("%s = PyDict_Copy(%s); %s" % (
self.result(),
item.py_result(),
code.error_goto_if_null(self.result(), item.pos)))
self.generate_gotref(code)
item.generate_disposal_code(code)
if item.is_temp:
code.putln("}")

if item.type is not dict_type:
code.putln('} else {')
Expand Down Expand Up @@ -9153,9 +9277,9 @@ class DictNode(ExprNode):
# Dictionary constructor.
#
# key_value_pairs [DictItemNode]
# exclude_null_values [boolean] Do not add NULL values to dict
# exclude_null_values boolean Do not add NULL values to dict
#
# obj_conversion_errors [PyrexError] used internally
# obj_conversion_errors PyrexError used internally

subexprs = ['key_value_pairs']
is_temp = 1
Expand Down
56 changes: 31 additions & 25 deletions Cython/Compiler/Optimize.py
Expand Up @@ -5048,6 +5048,13 @@ def visit_SingleAssignmentNode(self, node):
lhs.lhs_of_first_assignment = True
return node

def _check_optimize_method_calls(self, node):
function = node.function
return (node.is_temp and function.type.is_pyobject and self.current_directives.get(
"optimize.unpack_method_calls_in_pyinit"
if not self.in_loop and self.current_env().is_module_scope
else "optimize.unpack_method_calls"))

def visit_SimpleCallNode(self, node):
"""
Replace generic calls to isinstance(x, type) by a more efficient type check.
Expand All @@ -5064,38 +5071,37 @@ def visit_SimpleCallNode(self, node):
function.type = function.entry.type
PyTypeObjectPtr = PyrexTypes.CPtrType(cython_scope.lookup('PyTypeObject').type)
node.args[1] = ExprNodes.CastNode(node.args[1], PyTypeObjectPtr)
elif (node.is_temp and function.type.is_pyobject and self.current_directives.get(
"optimize.unpack_method_calls_in_pyinit"
if not self.in_loop and self.current_env().is_module_scope
else "optimize.unpack_method_calls")):
else:
# optimise simple Python methods calls
if isinstance(node.arg_tuple, ExprNodes.TupleNode) and not (
node.arg_tuple.mult_factor or (node.arg_tuple.is_literal and len(node.arg_tuple.args) > 1)):
if ExprNodes.PyMethodCallNode.can_be_used_for_posargs(node.arg_tuple, has_kwargs=False):
# simple call, now exclude calls to objects that are definitely not methods
may_be_a_method = True
if function.type is Builtin.type_type:
may_be_a_method = False
elif function.is_attribute:
if function.entry and function.entry.type.is_cfunction:
# optimised builtin method
may_be_a_method = False
elif function.is_name:
entry = function.entry
if entry.is_builtin or entry.type.is_cfunction:
may_be_a_method = False
elif entry.cf_assignments:
# local functions/classes are definitely not methods
non_method_nodes = (ExprNodes.PyCFunctionNode, ExprNodes.ClassNode, ExprNodes.Py3ClassNode)
may_be_a_method = any(
assignment.rhs and not isinstance(assignment.rhs, non_method_nodes)
for assignment in entry.cf_assignments)
if may_be_a_method:
if ExprNodes.PyMethodCallNode.can_be_used_for_function(function):
if (node.self and function.is_attribute and
isinstance(function.obj, ExprNodes.CloneNode) and function.obj.arg is node.self):
# function self object was moved into a CloneNode => undo
function.obj = function.obj.arg
node = self.replace(node, ExprNodes.PyMethodCallNode.from_node(
node, function=function, arg_tuple=node.arg_tuple, type=node.type))
node, function=function, arg_tuple=node.arg_tuple, type=node.type,
unpack=self._check_optimize_method_calls(node)))
return node

def visit_GeneralCallNode(self, node):
"""
Replace likely Python method calls by a specialised PyMethodCallNode.
"""
self.visitchildren(node)
has_kwargs = bool(node.keyword_args)
kwds_is_dict_node = isinstance(node.keyword_args, ExprNodes.DictNode)
if not ExprNodes.PyMethodCallNode.can_be_used_for_posargs(
node.positional_args, has_kwargs=has_kwargs, kwds_is_dict_node=kwds_is_dict_node):
return node
function = node.function
if not ExprNodes.PyMethodCallNode.can_be_used_for_function(function):
return node

node = self.replace(node, ExprNodes.PyMethodCallNode.from_node(
Comment on lines +5095 to +5102
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that the two can_be_… functions are always called before calling from_node(). That seems redundant. Why not let a PyMethodCallNode classmethod try to optimise the call (thus, maybe try_to_optimise_call()), and if it can't, return the original node unchanged?

Feel free to add a if node is (not) replacement short-cut to the replace() method to simplify this further.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm disagreeing with this for now.

There's a good chunk of duplication between SimpleCallNode and GeneralCallNode, but there there's also significant differences (a CloneNode check, the name of the attribute used for the positional arguments, handling of keyword arguments).

So I think the logic need to be duplicated either way, either here or with an isinstance check in PyMethodCallNode. I think it's slightly better here (where the visitor has found the classes for us).

I don't have a hugely strong opinion - it just doesn't look like a significant improvement to me.

node, function=function, arg_tuple=node.positional_args, kwdict=node.keyword_args,
type=node.type, unpack=self._check_optimize_method_calls(node)))
return node

def visit_NumPyMethodCallNode(self, node):
Expand Down
9 changes: 9 additions & 0 deletions Cython/Utility/ModuleSetupCode.c
Expand Up @@ -432,8 +432,17 @@
#endif

#ifndef CYTHON_VECTORCALL
#if CYTHON_COMPILING_IN_LIMITED_API
// Possibly needs a bit of clearing up, however:
// the limited API doesn't define CYTHON_FAST_PYCCALL (because that involves
// a lot of access to internals) but does define CYTHON_VECTORCALL because
// that's available cleanly from Python 3.12. Note that only VectorcallDict isn't
// available though.
#define CYTHON_VECTORCALL (__PYX_LIMITED_VERSION_HEX >= 0x030C0000)
#else
#define CYTHON_VECTORCALL (CYTHON_FAST_PYCCALL && PY_VERSION_HEX >= 0x030800B1)
#endif
#endif

/* Whether to use METH_FASTCALL with a fake backported implementation of vectorcall */
#define CYTHON_BACKPORT_VECTORCALL (CYTHON_METH_FASTCALL && PY_VERSION_HEX < 0x030800B1)
Expand Down