Custom int128 conversion as a slow fallback (cythonGH-5419)

* Use a custom (although slow) PyLong->cint128 conversion if "_PyLong_AsByteArray()" is missing (in PyPy/Limited API). * Avoid large integer conversion for enum types (where shift etc. don't work well).
da-woods · May 15, 2023 · dec61cd · dec61cd
1 parent 580ceee
commit dec61cd
Show file tree

Hide file tree

Showing 4 changed files with 142 additions and 18 deletions.
diff --git a/Cython/Compiler/PyrexTypes.py b/Cython/Compiler/PyrexTypes.py
@@ -532,8 +532,11 @@ def create_from_py_utility_code(self, env):
                     self.from_py_function = "__Pyx_PyInt_As_" + self.specialization_name()
                     env.use_utility_code(TempitaUtilityCode.load_cached(
                         "CIntFromPy", "TypeConversion.c",
-                        context={"TYPE": self.empty_declaration_code(),
-                                 "FROM_PY_FUNCTION": self.from_py_function}))
+                        context={
+                            "TYPE": self.empty_declaration_code(),
+                            "FROM_PY_FUNCTION": self.from_py_function,
+                            "IS_ENUM": base_type.is_enum,
+                        }))
                     return True
                 elif base_type.is_float:
                     pass  # XXX implement!
@@ -2043,8 +2046,11 @@ def create_from_py_utility_code(self, env):
             self.from_py_function = "__Pyx_PyInt_As_" + self.specialization_name()
             env.use_utility_code(TempitaUtilityCode.load_cached(
                 "CIntFromPy", "TypeConversion.c",
-                context={"TYPE": self.empty_declaration_code(),
-                         "FROM_PY_FUNCTION": self.from_py_function}))
+                context={
+                    "TYPE": self.empty_declaration_code(),
+                    "FROM_PY_FUNCTION": self.from_py_function,
+                    "IS_ENUM": self.is_enum,
+                }))
         return True
 
     @staticmethod

diff --git a/Cython/Utility/TypeConversion.c b/Cython/Utility/TypeConversion.c
@@ -1097,33 +1097,130 @@ static CYTHON_INLINE {{TYPE}} {{FROM_PY_FUNCTION}}(PyObject *x) {
 #endif
             }
         }
+
+        {{if IS_ENUM}}
+        PyErr_SetString(PyExc_RuntimeError,
+                        "_PyLong_AsByteArray() not available, cannot convert large enums");
+        return ({{TYPE}}) -1;
+        {{else}}
+        // large integer type and no access to PyLong internals => allow for a more expensive conversion
         {
-#if (CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) && !defined(_PyLong_AsByteArray)
-            PyErr_SetString(PyExc_RuntimeError,
-                            "_PyLong_AsByteArray() not available, cannot convert large numbers");
-#else
             {{TYPE}} val;
             PyObject *v = __Pyx_PyNumber_IntOrLong(x);
- #if PY_MAJOR_VERSION < 3
+#if PY_MAJOR_VERSION < 3
             if (likely(v) && !PyLong_Check(v)) {
                 PyObject *tmp = v;
                 v = PyNumber_Long(tmp);
                 Py_DECREF(tmp);
             }
- #endif
+#endif
             if (likely(v)) {
+                int ret = -1;
+#if !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray)
                 int one = 1; int is_little = (int)*(unsigned char *)&one;
                 unsigned char *bytes = (unsigned char *)&val;
-                int ret = _PyLong_AsByteArray((PyLongObject *)v,
-                                              bytes, sizeof(val),
-                                              is_little, !is_unsigned);
+                ret = _PyLong_AsByteArray((PyLongObject *)v,
+                                           bytes, sizeof(val),
+                                           is_little, !is_unsigned);
+#else
+// Inefficient copy of bit chunks through the C-API.  Probably still better than a "cannot do this" exception.
+                PyObject *stepval = NULL, *mask = NULL, *shift = NULL;
+                int bits, remaining_bits, is_negative = 0;
+                long idigit;
+                int chunk_size = (sizeof(long) < 8) ? 30 : 62;
+
+                // use exact PyLong to prevent user defined &&/<</etc. implementations
+                if (unlikely(!PyLong_CheckExact(v))) {
+                    PyObject *tmp = v;
+                    v = PyNumber_Long(v);
+                    assert(PyLong_CheckExact(v));
+                    Py_DECREF(tmp);
+                    if (unlikely(!v)) return ({{TYPE}}) -1;
+                }
+
+#if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000
+                if (Py_SIZE(x) == 0)
+                    return ({{TYPE}}) 0;
+                is_negative = Py_SIZE(x) < 0;
+#else
+                {
+                    // misuse Py_False as a quick way to compare to a '0' int object
+                    int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+                    if (unlikely(result < 0))
+                        return ({{TYPE}}) -1;
+                    is_negative = result == 1;
+                }
+#endif
+
+                if (is_unsigned && unlikely(is_negative)) {
+                    goto raise_neg_overflow;
+                } else if (is_negative) {
+                    // bit-invert to make sure we can safely convert it
+                    stepval = PyNumber_Invert(v);
+                    if (unlikely(!stepval))
+                        return ({{TYPE}}) -1;
+                } else {
+                    stepval = __Pyx_NewRef(v);
+                }
+
+                // unpack full chunks of bits
+                val = ({{TYPE}}) 0;
+                mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done;
+                shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done;
+                for (bits = 0; bits < (int) sizeof({{TYPE}}) * 8 - chunk_size; bits += chunk_size) {
+                    PyObject *tmp, *digit;
+
+                    digit = PyNumber_And(stepval, mask);
+                    if (unlikely(!digit)) goto done;
+                    idigit = PyLong_AsLong(digit);
+                    Py_DECREF(digit);
+                    if (unlikely(idigit < 0)) goto done;
+
+                    tmp = PyNumber_Rshift(stepval, shift);
+                    if (unlikely(!tmp)) goto done;
+                    Py_DECREF(stepval); stepval = tmp;
+
+                    val |= (({{TYPE}}) idigit) << bits;
+
+                    #if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000
+                    if (Py_SIZE(stepval) == 0)
+                        goto unpacking_done;
+                    #endif
+                }
+
+                // detect overflow when adding the last bits
+                idigit = PyLong_AsLong(stepval);
+                if (unlikely(idigit < 0)) goto done;
+                remaining_bits = ((int) sizeof({{TYPE}}) * 8) - bits - (is_unsigned ? 0 : 1);
+                if (unlikely(idigit >= (1L << remaining_bits)))
+                    goto raise_overflow;
+                val |= (({{TYPE}}) idigit) << bits;
+
+            #if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000
+            unpacking_done:
+            #endif
+                // handle sign and overflow into sign bit
+                if (!is_unsigned) {
+                    // gcc warns about unsigned (val < 0) => test sign bit instead
+                    if (unlikely(val & ((({{TYPE}}) 1) << (sizeof({{TYPE}}) * 8 - 1))))
+                        goto raise_overflow;
+                    // undo the PyNumber_Invert() above
+                    if (is_negative)
+                        val = ~val;
+                }
+                ret = 0;
+            done:
+                Py_XDECREF(shift);
+                Py_XDECREF(mask);
+                Py_XDECREF(stepval);
+#endif
                 Py_DECREF(v);
                 if (likely(!ret))
                     return val;
             }
-#endif
             return ({{TYPE}}) -1;
         }
+        {{endif}}
     } else {
         {{TYPE}} val;
         PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);

diff --git a/runtests.py b/runtests.py
@@ -2755,7 +2755,7 @@ def runtests(options, cmd_args, coverage=None):
     if options.exclude:
         exclude_selectors += [ string_selector(r) for r in options.exclude ]
 
-    if not COMPILER_HAS_INT128 or not IS_CPYTHON:
+    if not COMPILER_HAS_INT128:
         exclude_selectors += [RegExSelector('int128')]
 
     if options.shard_num > -1:

diff --git a/tests/run/int128.pyx b/tests/run/int128.pyx
@@ -1,3 +1,4 @@
+# mode: run
 
 cdef extern from *:
     ctypedef long long int128_t "__int128_t"
@@ -60,7 +61,16 @@ def unsigned_conversion(x):
     340282366920938463463374607431768211455
     >>> bigint(unsigned_conversion(2**128))  # doctest: +ELLIPSIS
     Traceback (most recent call last):
-    OverflowError: ... too big to convert
+    OverflowError: ... to convert...
+    >>> bigint(unsigned_conversion(2**128+1))  # doctest: +ELLIPSIS
+    Traceback (most recent call last):
+    OverflowError: ... to convert...
+    >>> bigint(unsigned_conversion(2**129-1))  # doctest: +ELLIPSIS
+    Traceback (most recent call last):
+    OverflowError: ... to convert...
+    >>> bigint(unsigned_conversion(2**129))  # doctest: +ELLIPSIS
+    Traceback (most recent call last):
+    OverflowError: ... to convert...
     """
     cdef uint128_t n = x
     return n
@@ -108,12 +118,23 @@ def signed_conversion(x):
     170141183460469231731687303715884105727
     >>> bigint(signed_conversion(2**127))  # doctest: +ELLIPSIS
     Traceback (most recent call last):
-    OverflowError: ... too big to convert
+    OverflowError: ... to convert...
+    >>> bigint(signed_conversion(-2**127+1))
+    -170141183460469231731687303715884105727
     >>> bigint(signed_conversion(-2**127))
     -170141183460469231731687303715884105728
     >>> bigint(signed_conversion(-2**127-1))  # doctest: +ELLIPSIS
     Traceback (most recent call last):
-    OverflowError: ... too big to convert
+    OverflowError: ... to convert...
+    >>> bigint(signed_conversion(-2**127-2))  # doctest: +ELLIPSIS
+    Traceback (most recent call last):
+    OverflowError: ... to convert...
+    >>> bigint(signed_conversion(-2**128+1))  # doctest: +ELLIPSIS
+    Traceback (most recent call last):
+    OverflowError: ... to convert...
+    >>> bigint(signed_conversion(-2**128))  # doctest: +ELLIPSIS
+    Traceback (most recent call last):
+    OverflowError: ... to convert...
     """
     cdef int128_t n = x
     return n