diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc index 60f58ea5b3b..a286c6bd5e9 100644 --- a/cpp/src/arrow/python/builtin_convert.cc +++ b/cpp/src/arrow/python/builtin_convert.cc @@ -538,8 +538,8 @@ class UInt64Converter : public TypedConverterVisitor(PyLong_AsUnsignedLongLong(obj)); - RETURN_IF_PYERROR(); + uint64_t val; + RETURN_NOT_OK(internal::UInt64FromPythonInt(obj, &val)); return typed_builder_->Append(val); } }; diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc index 494f929004a..df1db99911b 100644 --- a/cpp/src/arrow/python/helpers.cc +++ b/cpp/src/arrow/python/helpers.cc @@ -176,6 +176,23 @@ bool IsPyInteger(PyObject* obj) { #endif } +Status UInt64FromPythonInt(PyObject* obj, uint64_t* out) { + OwnedRef ref; + // PyLong_AsUnsignedLongLong() doesn't handle conversion from non-ints + // (e.g. np.uint64), so do it ourselves + if (!PyLong_Check(obj)) { + ref.reset(PyNumber_Long(obj)); + RETURN_IF_PYERROR(); + obj = ref.obj(); + } + auto result = static_cast(PyLong_AsUnsignedLongLong(obj)); + if (result == static_cast(-1)) { + RETURN_IF_PYERROR(); + } + *out = static_cast(result); + return Status::OK(); +} + } // namespace internal } // namespace py } // namespace arrow diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h index c82bdabc476..c0171aa2f5a 100644 --- a/cpp/src/arrow/python/helpers.h +++ b/cpp/src/arrow/python/helpers.h @@ -57,6 +57,8 @@ Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arr Decimal128* out); bool IsPyInteger(PyObject* obj); +Status UInt64FromPythonInt(PyObject* obj, uint64_t* out); + } // namespace internal } // namespace py } // namespace arrow diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index 5cd4a52a218..8423ff00b67 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -28,6 +28,20 @@ import six +int_type_pairs = [ + (np.int8, pa.int8()), + (np.int16, pa.int64()), + (np.int32, pa.int32()), + (np.int64, pa.int64()), + (np.uint8, pa.uint8()), + (np.uint16, pa.uint64()), + (np.uint32, pa.uint32()), + (np.uint64, pa.uint64())] + + +np_int_types, _ = zip(*int_type_pairs) + + class StrangeIterable: def __init__(self, lst): self.lst = lst @@ -146,7 +160,20 @@ def test_sequence_all_none(): @pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values]) -def test_sequence_integer(seq): +@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs) +def test_sequence_integer(seq, np_scalar_pa_type): + np_scalar, pa_type = np_scalar_pa_type + expected = [1, None, 3, None, + np.iinfo(np_scalar).min, np.iinfo(np_scalar).max] + arr = pa.array(seq(expected), type=pa_type) + assert len(arr) == 6 + assert arr.null_count == 2 + assert arr.type == pa_type + assert arr.to_pylist() == expected + + +@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values]) +def test_sequence_integer_inferred(seq): expected = [1, None, 3, None] arr = pa.array(seq(expected)) assert len(arr) == 4 @@ -156,12 +183,32 @@ def test_sequence_integer(seq): @pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values]) -@pytest.mark.parametrize("np_scalar", [np.int16, np.int32, np.int64, np.uint16, - np.uint32, np.uint64]) -def test_sequence_numpy_integer(seq, np_scalar): +@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs) +def test_sequence_numpy_integer(seq, np_scalar_pa_type): + np_scalar, pa_type = np_scalar_pa_type + expected = [np_scalar(1), None, np_scalar(3), None, + np_scalar(np.iinfo(np_scalar).min), + np_scalar(np.iinfo(np_scalar).max)] + arr = pa.array(seq(expected), type=pa_type) + assert len(arr) == 6 + assert arr.null_count == 2 + assert arr.type == pa_type + assert arr.to_pylist() == expected + + +@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values]) +@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs) +def test_sequence_numpy_integer_inferred(seq, np_scalar_pa_type): + np_scalar, pa_type = np_scalar_pa_type expected = [np_scalar(1), None, np_scalar(3), None] + if np_scalar != np.uint64: + expected += [np_scalar(np.iinfo(np_scalar).min), + np_scalar(np.iinfo(np_scalar).max)] + else: + # max(uint64) is too large for the inferred int64 type + expected += [0, np.iinfo(np.int64).max] arr = pa.array(seq(expected)) - assert len(arr) == 4 + assert len(arr) == 6 assert arr.null_count == 2 assert arr.type == pa.int64() assert arr.to_pylist() == expected diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index 5b6f6bcdfe5..95137ffb26e 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -1328,6 +1328,15 @@ def test_empty_list_roundtrip(self): tm.assert_frame_equal(result, df) + def test_array_from_nested_arrays(self): + df, schema = dataframe_with_arrays() + for field in schema: + arr = df[field.name].values + expected = pa.array(list(arr), type=field.type) + result = pa.array(arr) + assert result.type == field.type # == list + assert result.equals(expected) + class TestConvertStructTypes(object): """