Skip to content

Commit

Permalink
Custom int128 conversion as a slow fallback (cythonGH-5419)
Browse files Browse the repository at this point in the history
* Use a custom (although slow) PyLong->cint128 conversion if "_PyLong_AsByteArray()" is missing (in PyPy/Limited API).
* Avoid large integer conversion for enum types (where shift etc. don't work well).
  • Loading branch information
scoder committed May 15, 2023
1 parent 580ceee commit dec61cd
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 18 deletions.
14 changes: 10 additions & 4 deletions Cython/Compiler/PyrexTypes.py
Expand Up @@ -532,8 +532,11 @@ def create_from_py_utility_code(self, env):
self.from_py_function = "__Pyx_PyInt_As_" + self.specialization_name()
env.use_utility_code(TempitaUtilityCode.load_cached(
"CIntFromPy", "TypeConversion.c",
context={"TYPE": self.empty_declaration_code(),
"FROM_PY_FUNCTION": self.from_py_function}))
context={
"TYPE": self.empty_declaration_code(),
"FROM_PY_FUNCTION": self.from_py_function,
"IS_ENUM": base_type.is_enum,
}))
return True
elif base_type.is_float:
pass # XXX implement!
Expand Down Expand Up @@ -2043,8 +2046,11 @@ def create_from_py_utility_code(self, env):
self.from_py_function = "__Pyx_PyInt_As_" + self.specialization_name()
env.use_utility_code(TempitaUtilityCode.load_cached(
"CIntFromPy", "TypeConversion.c",
context={"TYPE": self.empty_declaration_code(),
"FROM_PY_FUNCTION": self.from_py_function}))
context={
"TYPE": self.empty_declaration_code(),
"FROM_PY_FUNCTION": self.from_py_function,
"IS_ENUM": self.is_enum,
}))
return True

@staticmethod
Expand Down
117 changes: 107 additions & 10 deletions Cython/Utility/TypeConversion.c
Expand Up @@ -1097,33 +1097,130 @@ static CYTHON_INLINE {{TYPE}} {{FROM_PY_FUNCTION}}(PyObject *x) {
#endif
}
}

{{if IS_ENUM}}
PyErr_SetString(PyExc_RuntimeError,
"_PyLong_AsByteArray() not available, cannot convert large enums");
return ({{TYPE}}) -1;
{{else}}
// large integer type and no access to PyLong internals => allow for a more expensive conversion
{
#if (CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) && !defined(_PyLong_AsByteArray)
PyErr_SetString(PyExc_RuntimeError,
"_PyLong_AsByteArray() not available, cannot convert large numbers");
#else
{{TYPE}} val;
PyObject *v = __Pyx_PyNumber_IntOrLong(x);
#if PY_MAJOR_VERSION < 3
#if PY_MAJOR_VERSION < 3
if (likely(v) && !PyLong_Check(v)) {
PyObject *tmp = v;
v = PyNumber_Long(tmp);
Py_DECREF(tmp);
}
#endif
#endif
if (likely(v)) {
int ret = -1;
#if !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray)
int one = 1; int is_little = (int)*(unsigned char *)&one;
unsigned char *bytes = (unsigned char *)&val;
int ret = _PyLong_AsByteArray((PyLongObject *)v,
bytes, sizeof(val),
is_little, !is_unsigned);
ret = _PyLong_AsByteArray((PyLongObject *)v,
bytes, sizeof(val),
is_little, !is_unsigned);
#else
// Inefficient copy of bit chunks through the C-API. Probably still better than a "cannot do this" exception.
PyObject *stepval = NULL, *mask = NULL, *shift = NULL;
int bits, remaining_bits, is_negative = 0;
long idigit;
int chunk_size = (sizeof(long) < 8) ? 30 : 62;

// use exact PyLong to prevent user defined &&/<</etc. implementations
if (unlikely(!PyLong_CheckExact(v))) {
PyObject *tmp = v;
v = PyNumber_Long(v);
assert(PyLong_CheckExact(v));
Py_DECREF(tmp);
if (unlikely(!v)) return ({{TYPE}}) -1;
}

#if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000
if (Py_SIZE(x) == 0)
return ({{TYPE}}) 0;
is_negative = Py_SIZE(x) < 0;
#else
{
// misuse Py_False as a quick way to compare to a '0' int object
int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
if (unlikely(result < 0))
return ({{TYPE}}) -1;
is_negative = result == 1;
}
#endif

if (is_unsigned && unlikely(is_negative)) {
goto raise_neg_overflow;
} else if (is_negative) {
// bit-invert to make sure we can safely convert it
stepval = PyNumber_Invert(v);
if (unlikely(!stepval))
return ({{TYPE}}) -1;
} else {
stepval = __Pyx_NewRef(v);
}

// unpack full chunks of bits
val = ({{TYPE}}) 0;
mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done;
shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done;
for (bits = 0; bits < (int) sizeof({{TYPE}}) * 8 - chunk_size; bits += chunk_size) {
PyObject *tmp, *digit;

digit = PyNumber_And(stepval, mask);
if (unlikely(!digit)) goto done;
idigit = PyLong_AsLong(digit);
Py_DECREF(digit);
if (unlikely(idigit < 0)) goto done;

tmp = PyNumber_Rshift(stepval, shift);
if (unlikely(!tmp)) goto done;
Py_DECREF(stepval); stepval = tmp;

val |= (({{TYPE}}) idigit) << bits;

#if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000
if (Py_SIZE(stepval) == 0)
goto unpacking_done;
#endif
}

// detect overflow when adding the last bits
idigit = PyLong_AsLong(stepval);
if (unlikely(idigit < 0)) goto done;
remaining_bits = ((int) sizeof({{TYPE}}) * 8) - bits - (is_unsigned ? 0 : 1);
if (unlikely(idigit >= (1L << remaining_bits)))
goto raise_overflow;
val |= (({{TYPE}}) idigit) << bits;

#if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000
unpacking_done:
#endif
// handle sign and overflow into sign bit
if (!is_unsigned) {
// gcc warns about unsigned (val < 0) => test sign bit instead
if (unlikely(val & ((({{TYPE}}) 1) << (sizeof({{TYPE}}) * 8 - 1))))
goto raise_overflow;
// undo the PyNumber_Invert() above
if (is_negative)
val = ~val;
}
ret = 0;
done:
Py_XDECREF(shift);
Py_XDECREF(mask);
Py_XDECREF(stepval);
#endif
Py_DECREF(v);
if (likely(!ret))
return val;
}
#endif
return ({{TYPE}}) -1;
}
{{endif}}
} else {
{{TYPE}} val;
PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
Expand Down
2 changes: 1 addition & 1 deletion runtests.py
Expand Up @@ -2755,7 +2755,7 @@ def runtests(options, cmd_args, coverage=None):
if options.exclude:
exclude_selectors += [ string_selector(r) for r in options.exclude ]

if not COMPILER_HAS_INT128 or not IS_CPYTHON:
if not COMPILER_HAS_INT128:
exclude_selectors += [RegExSelector('int128')]

if options.shard_num > -1:
Expand Down
27 changes: 24 additions & 3 deletions tests/run/int128.pyx
@@ -1,3 +1,4 @@
# mode: run

cdef extern from *:
ctypedef long long int128_t "__int128_t"
Expand Down Expand Up @@ -60,7 +61,16 @@ def unsigned_conversion(x):
340282366920938463463374607431768211455
>>> bigint(unsigned_conversion(2**128)) # doctest: +ELLIPSIS
Traceback (most recent call last):
OverflowError: ... too big to convert
OverflowError: ... to convert...
>>> bigint(unsigned_conversion(2**128+1)) # doctest: +ELLIPSIS
Traceback (most recent call last):
OverflowError: ... to convert...
>>> bigint(unsigned_conversion(2**129-1)) # doctest: +ELLIPSIS
Traceback (most recent call last):
OverflowError: ... to convert...
>>> bigint(unsigned_conversion(2**129)) # doctest: +ELLIPSIS
Traceback (most recent call last):
OverflowError: ... to convert...
"""
cdef uint128_t n = x
return n
Expand Down Expand Up @@ -108,12 +118,23 @@ def signed_conversion(x):
170141183460469231731687303715884105727
>>> bigint(signed_conversion(2**127)) # doctest: +ELLIPSIS
Traceback (most recent call last):
OverflowError: ... too big to convert
OverflowError: ... to convert...
>>> bigint(signed_conversion(-2**127+1))
-170141183460469231731687303715884105727
>>> bigint(signed_conversion(-2**127))
-170141183460469231731687303715884105728
>>> bigint(signed_conversion(-2**127-1)) # doctest: +ELLIPSIS
Traceback (most recent call last):
OverflowError: ... too big to convert
OverflowError: ... to convert...
>>> bigint(signed_conversion(-2**127-2)) # doctest: +ELLIPSIS
Traceback (most recent call last):
OverflowError: ... to convert...
>>> bigint(signed_conversion(-2**128+1)) # doctest: +ELLIPSIS
Traceback (most recent call last):
OverflowError: ... to convert...
>>> bigint(signed_conversion(-2**128)) # doctest: +ELLIPSIS
Traceback (most recent call last):
OverflowError: ... to convert...
"""
cdef int128_t n = x
return n
Expand Down

0 comments on commit dec61cd

Please sign in to comment.