Closed
Description
Describe the bug
After upgrading from Cython 0.29.36 to Cython 3.0.0 produced code just hang infinitly when OpenMP is used and sub-function are called.
Looking at produced code for bellow lines:
for j in prange(J):
foo_vec(x[:, j], y[:, j])
Using Cython 0.29.36 produced code is:
// for j in prange(J):
__pyx_t_1 = __pyx_v_J;
if ((1 == 0)) abort();
{
#if ((defined(__APPLE__) || defined(__OSX__)) && (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))))
#undef likely
#undef unlikely
#define likely(x) (x)
#define unlikely(x) (x)
#endif
__pyx_t_3 = (__pyx_t_1 - 0 + 1 - 1/abs(1)) / 1;
if (__pyx_t_3 > 0)
{
#ifdef _OPENMP
#pragma omp parallel
#endif /* _OPENMP */
{
#ifdef _OPENMP
#pragma omp for firstprivate(__pyx_v_j) lastprivate(__pyx_v_j)
#endif /* _OPENMP */
for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2++){
{
__pyx_v_j = (Py_ssize_t)(0 + 1 * __pyx_t_2);
// foo_vec(x[:, j], y[:, j])
__pyx_t_4.data = __pyx_v_x.data;
__pyx_t_4.memview = __pyx_v_x.memview;
__PYX_INC_MEMVIEW(&__pyx_t_4, 0);
__pyx_t_4.shape[0] = __pyx_v_x.shape[0];
__pyx_t_4.strides[0] = __pyx_v_x.strides[0];
__pyx_t_4.suboffsets[0] = -1;
{
Py_ssize_t __pyx_tmp_idx = __pyx_v_j;
Py_ssize_t __pyx_tmp_stride = __pyx_v_x.strides[1];
__pyx_t_4.data += __pyx_tmp_idx * __pyx_tmp_stride;
}
__pyx_t_5.data = __pyx_v_y.data;
__pyx_t_5.memview = __pyx_v_y.memview;
__PYX_INC_MEMVIEW(&__pyx_t_5, 0);
__pyx_t_5.shape[0] = __pyx_v_y.shape[0];
__pyx_t_5.strides[0] = __pyx_v_y.strides[0];
__pyx_t_5.suboffsets[0] = -1;
{
Py_ssize_t __pyx_tmp_idx = __pyx_v_j;
Py_ssize_t __pyx_tmp_stride = __pyx_v_y.strides[1];
__pyx_t_5.data += __pyx_tmp_idx * __pyx_tmp_stride;
}
__pyx_f_6my_ext_foo_vec(__pyx_t_4, __pyx_t_5);
__PYX_XDEC_MEMVIEW(&__pyx_t_4, 0);
__pyx_t_4.memview = NULL;
__pyx_t_4.data = NULL;
__PYX_XDEC_MEMVIEW(&__pyx_t_5, 0);
__pyx_t_5.memview = NULL;
__pyx_t_5.data = NULL;
}
}
}
}
}
#if ((defined(__APPLE__) || defined(__OSX__)) && (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))))
#undef likely
#undef unlikely
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
Using Cython 3.0 produced code is:
// for j in prange(J):
__pyx_t_1 = __pyx_v_J;
{
#if ((defined(__APPLE__) || defined(__OSX__)) && (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))))
#undef likely
#undef unlikely
#define likely(x) (x)
#define unlikely(x) (x)
#endif
__pyx_t_3 = (__pyx_t_1 - 0 + 1 - 1/abs(1)) / 1;
if (__pyx_t_3 > 0)
{
#ifdef _OPENMP
#pragma omp parallel
#endif /* _OPENMP */
{
#ifdef _OPENMP
#pragma omp for firstprivate(__pyx_v_j) lastprivate(__pyx_v_j)
#endif /* _OPENMP */
for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2++){
{
__pyx_v_j = (Py_ssize_t)(0 + 1 * __pyx_t_2);
// foo_vec(x[:, j], y[:, j])
__pyx_t_4.data = __pyx_v_x.data;
__pyx_t_4.memview = __pyx_v_x.memview;
__PYX_INC_MEMVIEW(&__pyx_t_4, 0);
__pyx_t_4.shape[0] = __pyx_v_x.shape[0];
__pyx_t_4.strides[0] = __pyx_v_x.strides[0];
__pyx_t_4.suboffsets[0] = -1;
{
Py_ssize_t __pyx_tmp_idx = __pyx_v_j;
Py_ssize_t __pyx_tmp_stride = __pyx_v_x.strides[1];
__pyx_t_4.data += __pyx_tmp_idx * __pyx_tmp_stride;
}
__pyx_t_5.data = __pyx_v_y.data;
__pyx_t_5.memview = __pyx_v_y.memview;
__PYX_INC_MEMVIEW(&__pyx_t_5, 0);
__pyx_t_5.shape[0] = __pyx_v_y.shape[0];
__pyx_t_5.strides[0] = __pyx_v_y.strides[0];
__pyx_t_5.suboffsets[0] = -1;
{
Py_ssize_t __pyx_tmp_idx = __pyx_v_j;
Py_ssize_t __pyx_tmp_stride = __pyx_v_y.strides[1];
__pyx_t_5.data += __pyx_tmp_idx * __pyx_tmp_stride;
}
__pyx_f_6my_ext_foo_vec(__pyx_t_4, __pyx_t_5); if (unlikely(__Pyx_ErrOccurredWithGIL())) __PYX_ERR(0, 40, __pyx_L5_error)
__PYX_XCLEAR_MEMVIEW(&__pyx_t_4, 0);
__pyx_t_4.memview = NULL; __pyx_t_4.data = NULL;
__PYX_XCLEAR_MEMVIEW(&__pyx_t_5, 0);
__pyx_t_5.memview = NULL; __pyx_t_5.data = NULL;
goto __pyx_L8;
__pyx_L5_error:;
{
#ifdef WITH_THREAD
PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure();
#endif
#ifdef _OPENMP
#pragma omp flush(__pyx_parallel_exc_type)
#endif /* _OPENMP */
if (!__pyx_parallel_exc_type) {
__Pyx_ErrFetchWithState(&__pyx_parallel_exc_type, &__pyx_parallel_exc_value, &__pyx_parallel_exc_tb);
__pyx_parallel_filename = __pyx_filename; __pyx_parallel_lineno = __pyx_lineno; __pyx_parallel_clineno = __pyx_clineno;
__Pyx_GOTREF(__pyx_parallel_exc_type);
}
#ifdef WITH_THREAD
__Pyx_PyGILState_Release(__pyx_gilstate_save);
#endif
}
__pyx_parallel_why = 4;
goto __pyx_L7;
__pyx_L7:;
#ifdef _OPENMP
#pragma omp critical(__pyx_parallel_lastprivates0)
#endif /* _OPENMP */
{
__pyx_parallel_temp0 = __pyx_v_j;
}
__pyx_L8:;
#ifdef _OPENMP
#pragma omp flush(__pyx_parallel_why)
#endif /* _OPENMP */
}
}
#ifdef _OPENMP
Py_END_ALLOW_THREADS
#else
{
#ifdef WITH_THREAD
PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure();
#endif
#endif /* _OPENMP */
/* Clean up any temporaries */
__PYX_XCLEAR_MEMVIEW(&__pyx_t_4, 0);
__pyx_t_4.memview = NULL; __pyx_t_4.data = NULL;
__PYX_XCLEAR_MEMVIEW(&__pyx_t_5, 0);
__pyx_t_5.memview = NULL; __pyx_t_5.data = NULL;
#ifdef WITH_THREAD
__Pyx_PyGILState_Release(__pyx_gilstate_save);
#endif
#ifndef _OPENMP
}
#endif /* _OPENMP */
}
}
if (__pyx_parallel_exc_type) {
/* This may have been overridden by a continue, break or return in another thread. Prefer the error. */
__pyx_parallel_why = 4;
}
if (__pyx_parallel_why) {
__pyx_v_j = __pyx_parallel_temp0;
switch (__pyx_parallel_why) {
case 4:
{
#ifdef WITH_THREAD
PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure();
#endif
__Pyx_GIVEREF(__pyx_parallel_exc_type);
__Pyx_ErrRestoreWithState(__pyx_parallel_exc_type, __pyx_parallel_exc_value, __pyx_parallel_exc_tb);
__pyx_filename = __pyx_parallel_filename; __pyx_lineno = __pyx_parallel_lineno; __pyx_clineno = __pyx_parallel_clineno;
#ifdef WITH_THREAD
__Pyx_PyGILState_Release(__pyx_gilstate_save);
#endif
}
goto __pyx_L1_error;
}
}
}
#if ((defined(__APPLE__) || defined(__OSX__)) && (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))))
#undef likely
#undef unlikely
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
I am not expecting any GIL check / any other check in prange
body.
Code to reproduce the behaviour:
my_ext.pyx :
cimport cython
from cython.parallel cimport prange
cimport numpy as np
np.import_array()
def foo(np.ndarray x not None):
cdef int ndim = np.PyArray_NDIM(x)
y = np.PyArray_EMPTY(ndim, np.PyArray_DIMS(x), np.NPY_FLOAT64, 0)
if ndim == 1:
foo_vec(x, y)
elif ndim == 2:
foo_mat(x, y)
return y
@cython.boundscheck(False)
@cython.wraparound(False)
cdef void foo_vec(const double[:] x, double[:] y) nogil:
cdef:
Py_ssize_t I = x.shape[0]
Py_ssize_t i
for i in range(I):
y[i] = x[i] * 2.0 + 1.0 # Do whatever computation on vec
@cython.boundscheck(False)
@cython.wraparound(False)
cdef void foo_mat(const double[:, :] x, double[:, :] y) nogil:
cdef:
Py_ssize_t J = x.shape[1]
Py_ssize_t j
for j in prange(J):
foo_vec(x[:, j], y[:, j])
setup.py :
from setuptools import setup, Extension
from Cython.Build import cythonize
import numpy as np
setup(
ext_modules=cythonize(
[Extension(
"my_ext", ["my_ext.pyx"],
include_dirs=[np.get_include()],
extra_compile_args=["-fopenmp"],
extra_link_args=["-fopenmp"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
)],
language_level=3,
annotate=True,
)
)
Expected behaviour
I am expecting code produced code that:
- does not require any GIL check
- works correctly on latest Cython version😀
OS
Linux
Python version
3.10.6
Cython version
3.0.0
Additional context
No response
Metadata
Metadata
Assignees
Labels
No labels