Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build Parsing.py in the limited api #5845

Merged
merged 18 commits into from Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 10 additions & 6 deletions Cython/Compiler/ExprNodes.py
Expand Up @@ -14155,13 +14155,14 @@ class CoerceToBooleanNode(CoercionNode):

type = PyrexTypes.c_bint_type

# Note that all of these need a check if CYTHON_ASSUME_SAFE_MACROS is false
_special_builtins = {
Builtin.list_type: 'PyList_GET_SIZE',
Builtin.tuple_type: 'PyTuple_GET_SIZE',
Builtin.set_type: 'PySet_GET_SIZE',
Builtin.frozenset_type: 'PySet_GET_SIZE',
Builtin.bytes_type: 'PyBytes_GET_SIZE',
Builtin.bytearray_type: 'PyByteArray_GET_SIZE',
Builtin.list_type: '__Pyx_PyList_GET_SIZE',
Builtin.tuple_type: '__Pyx_PyTuple_GET_SIZE',
Builtin.set_type: '__Pyx_PySet_GET_SIZE',
Builtin.frozenset_type: '__Pyx_PySet_GET_SIZE',
Builtin.bytes_type: '__Pyx_PyBytes_GET_SIZE',
Builtin.bytearray_type: '__Pyx_PyByteArray_GET_SIZE',
Builtin.unicode_type: '__Pyx_PyUnicode_IS_TRUE',
}

Expand Down Expand Up @@ -14193,6 +14194,9 @@ def generate_result_code(self, code):
checks = ["(%s != Py_None)" % self.arg.py_result()] if self.arg.may_be_none() else []
checks.append("(%s(%s) != 0)" % (test_func, self.arg.py_result()))
code.putln("%s = %s;" % (self.result(), '&&'.join(checks)))
code.putln("#if !CYTHON_ASSUME_SAFE_MACROS")
code.putln(code.error_goto_if_neg(self.result(), self.pos))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unfortunate, because it may lead to an unused label. In rare cases, admittedly…

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can rewrite it as

if ((!CYTHON_ASSUME_SAFE_MACROS) && %result% < 0) goto error

so the label is never unused. That might be slightly better (provided we manage to avoid "condition always true" warnings)

code.putln("#endif")
else:
code.putln(
"%s = __Pyx_PyObject_IsTrue(%s); %s" % (
Expand Down
36 changes: 34 additions & 2 deletions Cython/Utility/Builtins.c
Expand Up @@ -331,16 +331,34 @@ static long __Pyx__PyObject_Ord(PyObject* c); /*proto*/
static long __Pyx__PyObject_Ord(PyObject* c) {
Py_ssize_t size;
if (PyBytes_Check(c)) {
#if CYTHON_ASSUME_SAFE_MACROS
size = PyBytes_GET_SIZE(c);
if (likely(size == 1)) {
return (unsigned char) PyBytes_AS_STRING(c)[0];
}
#if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE))
#else
size = PyBytes_Size(c);
if (unlikely(size) < 0) return -1;
else if (likely(size==1)) {
char *data = PyBytes_AsString(c);
if (unlikely(!data)) return -1;
return (unsigned char) data[0];
}
da-woods marked this conversation as resolved.
Show resolved Hide resolved
#endif
} else if (PyByteArray_Check(c)) {
#if CYTHON_ASSUME_SAFE_MACROS
size = PyByteArray_GET_SIZE(c);
if (likely(size == 1)) {
return (unsigned char) PyByteArray_AS_STRING(c)[0];
}
#else
size = PyByteArray_Size(c);
if (unlikely(size<0)) return -1;
else if (likely(size == 1)) {
char *data = PyByteArray_AsString(c);
if (unlikely(!data)) return -1;
return (unsigned char) data[0];
}
da-woods marked this conversation as resolved.
Show resolved Hide resolved
#endif
} else {
// FIXME: support character buffers - but CPython doesn't support them either
Expand Down Expand Up @@ -475,8 +493,22 @@ static CYTHON_INLINE PyObject* __Pyx_PyFrozenSet_New(PyObject* it) {
result = PyFrozenSet_New(it);
if (unlikely(!result))
return NULL;
if ((PY_VERSION_HEX >= 0x031000A1) || likely(PySet_GET_SIZE(result)))
if ((__PYX_LIMITED_VERSION_HEX >= 0x031000A1))
return result;
{
Py_ssize_t size;
#if CYTHON_ASSUME_SAFE_MACROS
size = PySet_GET_SIZE(result);
#else
size = PySet_Size(result);
if (size < 0) {
da-woods marked this conversation as resolved.
Show resolved Hide resolved
Py_DECREF(result);
return NULL;
}
#endif
if (likely(size))
return result;
}
// empty frozenset is a singleton (on Python <3.10)
// seems wasteful, but CPython does the same
Py_DECREF(result);
Expand Down
39 changes: 37 additions & 2 deletions Cython/Utility/ObjectHandling.c
Expand Up @@ -52,9 +52,17 @@ static void __Pyx_UnpackTupleError(PyObject *, Py_ssize_t index); /*proto*/
static void __Pyx_UnpackTupleError(PyObject *t, Py_ssize_t index) {
if (t == Py_None) {
__Pyx_RaiseNoneNotIterableError();
} else if (PyTuple_GET_SIZE(t) < index) {
__Pyx_RaiseNeedMoreValuesError(PyTuple_GET_SIZE(t));
} else {
#if CYTHON_ASSUME_SAFE_MACROS
Py_ssize_t size = PyTuple_GET_SIZE(t);
#else
Py_ssize_t size = PyTuple_Size(t);
if (unlikely(size < 0)) return;
#endif
if (size < index) {
__Pyx_RaiseNeedMoreValuesError(size);
return;
}
__Pyx_RaiseTooManyValuesError(index);
da-woods marked this conversation as resolved.
Show resolved Hide resolved
}
}
Expand All @@ -79,12 +87,17 @@ static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected) {

/////////////// UnpackTuple2.proto ///////////////

#if CYTHON_ASSUME_SAFE_MACROS
#define __Pyx_unpack_tuple2(tuple, value1, value2, is_tuple, has_known_size, decref_tuple) \
(likely(is_tuple || PyTuple_Check(tuple)) ? \
(likely(has_known_size || PyTuple_GET_SIZE(tuple) == 2) ? \
__Pyx_unpack_tuple2_exact(tuple, value1, value2, decref_tuple) : \
(__Pyx_UnpackTupleError(tuple, 2), -1)) : \
__Pyx_unpack_tuple2_generic(tuple, value1, value2, has_known_size, decref_tuple))
#else
static CYTHON_INLINE int __Pyx_unpack_tuple2(
PyObject* tuple, PyObject** value1, PyObject** value2, int is_tuple, int has_known_size, int decref_tuple);
#endif
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems worth generally unfolding this lengthy macro into the inline function you wrote, and just special casing the size check.


static CYTHON_INLINE int __Pyx_unpack_tuple2_exact(
PyObject* tuple, PyObject** value1, PyObject** value2, int decref_tuple);
Expand All @@ -96,6 +109,28 @@ static int __Pyx_unpack_tuple2_generic(
//@requires: UnpackTupleError
//@requires: RaiseNeedMoreValuesToUnpack

#if !CYTHON_ASSUME_SAFE_MACROS
static CYTHON_INLINE int __Pyx_unpack_tuple2(
PyObject* tuple, PyObject** value1, PyObject** value2, int is_tuple, int has_known_size, int decref_tuple) {
if (likely(is_tuple || PyTuple_Check(tuple))) {
Py_ssize_t size;
if (has_known_size) {
return __Pyx_unpack_tuple2_exact(tuple, value1, value2, decref_tuple);
}
size = PyTuple_Size(tuple);
if (unlikely(size < 0)) {
return -1;
} else if (likely(size == 2)) {
return __Pyx_unpack_tuple2_exact(tuple, value1, value2, decref_tuple);
}
__Pyx_UnpackTupleError(tuple, 2);
return -1;
} else {
return __Pyx_unpack_tuple2_generic(tuple, value1, value2, has_known_size, decref_tuple);
}
}
#endif

static CYTHON_INLINE int __Pyx_unpack_tuple2_exact(
PyObject* tuple, PyObject** pvalue1, PyObject** pvalue2, int decref_tuple) {
PyObject *value1 = NULL, *value2 = NULL;
Expand Down
42 changes: 38 additions & 4 deletions Cython/Utility/Optimize.c
Expand Up @@ -361,8 +361,20 @@ static CYTHON_INLINE int __Pyx_dict_iter_next(
}
Py_INCREF(key);
Py_INCREF(value);
#if CYTHON_ASSUME_SAFE_MACROS
PyTuple_SET_ITEM(tuple, 0, key);
PyTuple_SET_ITEM(tuple, 1, value);
#else
if (unlikely(PyTuple_SetItem(tuple, 0, key) < 0)) {
Py_DECREF(value); // we haven't set this yet
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We haven't actually set key in this case either. We need to decref both (and value in the failure case below).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PyTuple_SetItem decrefs key on failure (https://github.com/python/cpython/blob/939fc6d6eab9b7ea8c244d513610dbdd556503a7/Objects/tupleobject.c#L122C10-L122C10) so that it effectively steals a reference whether or not it succeeds.

I'll add a comment because it isn't obvious though

Py_DECREF(tuple);
return -1;
}
if (unlikely(PyTuple_SetItem(tuple, 1, value) < 0)) {
Py_DECREF(tuple);
return -1;
}
#endif
*pitem = tuple;
} else {
if (pkey) {
Expand All @@ -376,16 +388,38 @@ static CYTHON_INLINE int __Pyx_dict_iter_next(
}
return 1;
} else if (PyTuple_CheckExact(iter_obj)) {
Py_ssize_t pos = *ppos;
if (unlikely(pos >= PyTuple_GET_SIZE(iter_obj))) return 0;
Py_ssize_t pos = *ppos, iter_size;
#if CYTHON_ASSUME_SAFE_MACROS
iter_size = PyTuple_GET_SIZE(iter_obj);
#else
iter_size = PyTuple_Size(iter_obj);
if (unlikely(iter_size < 0)) return -1;
#endif
if (unlikely(pos >= iter_size)) return 0;
*ppos = pos + 1;
#if CYTHON_ASSUME_SAFE_MACROS
next_item = PyTuple_GET_ITEM(iter_obj, pos);
#else
next_item = PyTuple_GetItem(iter_obj, pos);
if (unlikely(!next_item)) return -1;
#endif
Py_INCREF(next_item);
} else if (PyList_CheckExact(iter_obj)) {
Py_ssize_t pos = *ppos;
if (unlikely(pos >= PyList_GET_SIZE(iter_obj))) return 0;
Py_ssize_t pos = *ppos, iter_size;
#if CYTHON_ASSUME_SAFE_MACROS
iter_size = PyList_GET_SIZE(iter_obj);
#else
iter_size = PyList_Size(iter_obj);
if (unlikely(iter_size < 0)) return -1;
#endif
if (unlikely(pos >= iter_size)) return 0;
*ppos = pos + 1;
#if CYTHON_ASSUME_SAFE_MACROS
next_item = PyList_GET_ITEM(iter_obj, pos);
#else
next_item = PyList_GetItem(iter_obj, pos);
if (unlikely(!next_item)) return -1;
#endif
Py_INCREF(next_item);
} else
#endif
Expand Down
18 changes: 17 additions & 1 deletion Cython/Utility/StringTools.c
Expand Up @@ -75,7 +75,7 @@ static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 ch

//////////////////// PyUCS4InUnicode ////////////////////

#if PY_VERSION_HEX < 0x03090000 || (defined(PyUnicode_WCHAR_KIND) && defined(PyUnicode_AS_UNICODE))
#if (PY_VERSION_HEX < 0x03090000 || (defined(PyUnicode_WCHAR_KIND) && defined(PyUnicode_AS_UNICODE)) && !CYTHON_COMPILING_IN_LIMITED_API)

#if PY_VERSION_HEX < 0x03090000
#define __Pyx_PyUnicode_AS_UNICODE(op) PyUnicode_AS_UNICODE(op)
Expand Down Expand Up @@ -113,6 +113,14 @@ static int __Pyx_PyUnicodeBufferContainsUCS4_BMP(Py_UNICODE* buffer, Py_ssize_t
#endif

static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character) {
#if CYTHON_COMPILING_IN_LIMITED_API
// Note that from Python 3.7, the indices of FindChar account for wraparound so no
// need to check the length
Py_ssize_t idx = PyUnicode_FindChar(unicode, character, 0, -1, 1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could also pass PY_SSIZE_T_MAX as end to avoid the risk of an off-by-one error for -1 (because, is that inclusive or exclusive?).

Given that there's a C-API function for this, though, I wonder if we're really still faster than that. The implementation seems quite efficient. From my expecience, PyUnicode_READ() isn't the fastest thing to use, compared to a straight memory loop.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've taken this to mean "just use PyUnicode_FindChar in all cases". Happy to revert if this wasn't what you mean though!

if (idx == -1) return 0; // not found
else if (unlikely(idx < 0)) return -1; // error
else return 1; // found
da-woods marked this conversation as resolved.
Show resolved Hide resolved
#else
const int kind = PyUnicode_KIND(unicode);
#ifdef PyUnicode_WCHAR_KIND
if (likely(kind != PyUnicode_WCHAR_KIND))
Expand Down Expand Up @@ -144,6 +152,7 @@ static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 ch

}
#endif
#endif
}


Expand Down Expand Up @@ -550,12 +559,18 @@ static CYTHON_INLINE PyObject* __Pyx_decode_bytearray(

/////////////// PyUnicode_Substring.proto ///////////////

#if !CYTHON_COMPILING_IN_LIMITED_API
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
PyObject* text, Py_ssize_t start, Py_ssize_t stop);
#else
// In the limited API since 3.7
#define __Pyx_PyUnicode_Substring(text, start, stop) PyUnicode_Substring(text, start, stop)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't quite right - the __Pyx_PyUnicode_Substring function handles negative indices and PyUnicode_Substring doesn't. Will fix

#endif

/////////////// PyUnicode_Substring ///////////////
//@substitute: naming

#if !CYTHON_COMPILING_IN_LIMITED_API
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
PyObject* text, Py_ssize_t start, Py_ssize_t stop) {
Py_ssize_t length;
Expand All @@ -577,6 +592,7 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start);
}
#endif


/////////////// py_unicode_istitle.proto ///////////////
Expand Down
9 changes: 9 additions & 0 deletions Cython/Utility/TypeConversion.c
Expand Up @@ -576,9 +576,18 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyUnicode_AsPy_UCS4(PyObject*);

static CYTHON_INLINE Py_UCS4 __Pyx_PyUnicode_AsPy_UCS4(PyObject* x) {
Py_ssize_t length;
#if CYTHON_ASSUME_SAFE_MACROS
length = PyUnicode_GET_LENGTH(x);
#else
length = PyUnicode_GetLength(x);
if (length < 0) return (Py_UCS4)-1;
#endif
if (likely(length == 1)) {
#if CYTHON_ASSUME_SAFE_MACROS
return PyUnicode_READ_CHAR(x, 0);
#else
return PyUnicode_ReadChar(x, 0);
#endif
da-woods marked this conversation as resolved.
Show resolved Hide resolved
}
PyErr_Format(PyExc_ValueError,
"only single character unicode strings can be converted to Py_UCS4, "
Expand Down