Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cpp/src/arrow/python/builtin_convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -538,8 +538,8 @@ class UInt64Converter : public TypedConverterVisitor<UInt64Builder, UInt64Conver
public:
// Append a non-missing item
Status AppendItem(PyObject* obj) {
const auto val = static_cast<int64_t>(PyLong_AsUnsignedLongLong(obj));
RETURN_IF_PYERROR();
uint64_t val;
RETURN_NOT_OK(internal::UInt64FromPythonInt(obj, &val));
return typed_builder_->Append(val);
}
};
Expand Down
17 changes: 17 additions & 0 deletions cpp/src/arrow/python/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,23 @@ bool IsPyInteger(PyObject* obj) {
#endif
}

Status UInt64FromPythonInt(PyObject* obj, uint64_t* out) {
OwnedRef ref;
// PyLong_AsUnsignedLongLong() doesn't handle conversion from non-ints
// (e.g. np.uint64), so do it ourselves
if (!PyLong_Check(obj)) {
ref.reset(PyNumber_Long(obj));
RETURN_IF_PYERROR();
obj = ref.obj();
}
auto result = static_cast<uint64_t>(PyLong_AsUnsignedLongLong(obj));
if (result == static_cast<uint64_t>(-1)) {
RETURN_IF_PYERROR();
}
*out = static_cast<uint64_t>(result);
return Status::OK();
}

} // namespace internal
} // namespace py
} // namespace arrow
2 changes: 2 additions & 0 deletions cpp/src/arrow/python/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arr
Decimal128* out);
bool IsPyInteger(PyObject* obj);

Status UInt64FromPythonInt(PyObject* obj, uint64_t* out);

} // namespace internal
} // namespace py
} // namespace arrow
Expand Down
57 changes: 52 additions & 5 deletions python/pyarrow/tests/test_convert_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,20 @@
import six


int_type_pairs = [
(np.int8, pa.int8()),
(np.int16, pa.int64()),
(np.int32, pa.int32()),
(np.int64, pa.int64()),
(np.uint8, pa.uint8()),
(np.uint16, pa.uint64()),
(np.uint32, pa.uint32()),
(np.uint64, pa.uint64())]


np_int_types, _ = zip(*int_type_pairs)


class StrangeIterable:
def __init__(self, lst):
self.lst = lst
Expand Down Expand Up @@ -146,7 +160,20 @@ def test_sequence_all_none():


@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
def test_sequence_integer(seq):
@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
def test_sequence_integer(seq, np_scalar_pa_type):
np_scalar, pa_type = np_scalar_pa_type
expected = [1, None, 3, None,
np.iinfo(np_scalar).min, np.iinfo(np_scalar).max]
arr = pa.array(seq(expected), type=pa_type)
assert len(arr) == 6
assert arr.null_count == 2
assert arr.type == pa_type
assert arr.to_pylist() == expected


@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
def test_sequence_integer_inferred(seq):
expected = [1, None, 3, None]
arr = pa.array(seq(expected))
assert len(arr) == 4
Expand All @@ -156,12 +183,32 @@ def test_sequence_integer(seq):


@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
@pytest.mark.parametrize("np_scalar", [np.int16, np.int32, np.int64, np.uint16,
np.uint32, np.uint64])
def test_sequence_numpy_integer(seq, np_scalar):
@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
def test_sequence_numpy_integer(seq, np_scalar_pa_type):
np_scalar, pa_type = np_scalar_pa_type
expected = [np_scalar(1), None, np_scalar(3), None,
np_scalar(np.iinfo(np_scalar).min),
np_scalar(np.iinfo(np_scalar).max)]
arr = pa.array(seq(expected), type=pa_type)
assert len(arr) == 6
assert arr.null_count == 2
assert arr.type == pa_type
assert arr.to_pylist() == expected


@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
def test_sequence_numpy_integer_inferred(seq, np_scalar_pa_type):
np_scalar, pa_type = np_scalar_pa_type
expected = [np_scalar(1), None, np_scalar(3), None]
if np_scalar != np.uint64:
expected += [np_scalar(np.iinfo(np_scalar).min),
np_scalar(np.iinfo(np_scalar).max)]
else:
# max(uint64) is too large for the inferred int64 type
expected += [0, np.iinfo(np.int64).max]
arr = pa.array(seq(expected))
assert len(arr) == 4
assert len(arr) == 6
assert arr.null_count == 2
assert arr.type == pa.int64()
assert arr.to_pylist() == expected
Expand Down
9 changes: 9 additions & 0 deletions python/pyarrow/tests/test_convert_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1328,6 +1328,15 @@ def test_empty_list_roundtrip(self):

tm.assert_frame_equal(result, df)

def test_array_from_nested_arrays(self):
df, schema = dataframe_with_arrays()
for field in schema:
arr = df[field.name].values
expected = pa.array(list(arr), type=field.type)
result = pa.array(arr)
assert result.type == field.type # == list<scalar>
assert result.equals(expected)


class TestConvertStructTypes(object):
"""
Expand Down