Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-2390: [C++/Python] Map Python exceptions to Arrow status codes #1916

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 17 additions & 0 deletions cpp/src/arrow/python/common.cc
Expand Up @@ -98,6 +98,23 @@ Status CheckPyError(StatusCode code) {

std::string message;
RETURN_NOT_OK(internal::PyObject_StdStringStr(exc_value, &message));

if (code == StatusCode::UnknownError) {
// Try to match the Python exception type with an appropriate Status code
if (PyErr_GivenExceptionMatches(exc_type, PyExc_MemoryError)) {
code = StatusCode::OutOfMemory;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_KeyError)) {
code = StatusCode::KeyError;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_TypeError)) {
code = StatusCode::TypeError;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_ValueError)) {
code = StatusCode::Invalid;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_EnvironmentError)) {
code = StatusCode::IOError;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_NotImplementedError)) {
code = StatusCode::NotImplemented;
}
}
return Status(code, message);
}
return Status::OK();
Expand Down
48 changes: 47 additions & 1 deletion cpp/src/arrow/python/python-test.cc
Expand Up @@ -77,6 +77,52 @@ TEST(OwnedRefNoGIL, TestMoves) {
ASSERT_EQ(Py_REFCNT(v), 1);
}

TEST(CheckPyError, TestStatus) {
PyAcquireGIL lock;
Status st;

auto check_error = [](Status& st, const char* expected_message = "some error") {
st = CheckPyError();
ASSERT_EQ(st.message(), expected_message);
ASSERT_FALSE(PyErr_Occurred());
};

for (PyObject* exc_type : {PyExc_Exception, PyExc_SyntaxError}) {
PyErr_SetString(exc_type, "some error");
check_error(st);
ASSERT_TRUE(st.IsUnknownError());
}

PyErr_SetString(PyExc_TypeError, "some error");
check_error(st);
ASSERT_TRUE(st.IsTypeError());

PyErr_SetString(PyExc_ValueError, "some error");
check_error(st);
ASSERT_TRUE(st.IsInvalid());

PyErr_SetString(PyExc_KeyError, "some error");
check_error(st, "'some error'");
ASSERT_TRUE(st.IsKeyError());

for (PyObject* exc_type : {PyExc_OSError, PyExc_IOError}) {
PyErr_SetString(exc_type, "some error");
check_error(st);
ASSERT_TRUE(st.IsIOError());
}

PyErr_SetString(PyExc_NotImplementedError, "some error");
check_error(st);
ASSERT_TRUE(st.IsNotImplemented());

// No override if a specific status code is given
PyErr_SetString(PyExc_TypeError, "some error");
st = CheckPyError(StatusCode::SerializationError);
ASSERT_TRUE(st.IsSerializationError());
ASSERT_EQ(st.message(), "some error");
ASSERT_FALSE(PyErr_Occurred());
}

class DecimalTest : public ::testing::Test {
public:
DecimalTest() : lock_(), decimal_constructor_() {
Expand Down Expand Up @@ -221,7 +267,7 @@ TEST(BuiltinConversionTest, TestMixedTypeFails) {
ASSERT_EQ(PyList_SetItem(list, 1, integer), 0);
ASSERT_EQ(PyList_SetItem(list, 2, doub), 0);

ASSERT_RAISES(UnknownError, ConvertPySequence(list, pool, &arr));
ASSERT_RAISES(TypeError, ConvertPySequence(list, pool, &arr));
}

TEST_F(DecimalTest, FromPythonDecimalRescaleNotTruncateable) {
Expand Down
4 changes: 2 additions & 2 deletions python/pyarrow/tests/test_convert_builtin.py
Expand Up @@ -316,7 +316,7 @@ def test_sequence_utf8_to_unicode():

# test a non-utf8 unicode string
val = (u'mañana').encode('utf-16-le')
with pytest.raises(pa.ArrowException):
with pytest.raises(pa.ArrowInvalid):
pa.array([val], type=pa.string())


Expand Down Expand Up @@ -496,7 +496,7 @@ def test_sequence_mixed_nesting_levels():

def test_sequence_mixed_types_fails():
data = ['a', 1, 2.0]
with pytest.raises(pa.ArrowException):
with pytest.raises(pa.ArrowTypeError):
pa.array(data)


Expand Down
40 changes: 18 additions & 22 deletions python/pyarrow/tests/test_convert_pandas.py
Expand Up @@ -291,10 +291,10 @@ def test_dictionary_indices_boundscheck(self):
batch = pa.RecordBatch.from_arrays([arr], ['foo'])
table = pa.Table.from_batches([batch, batch, batch])

with pytest.raises(pa.ArrowException):
with pytest.raises(pa.ArrowInvalid):
arr.to_pandas()

with pytest.raises(pa.ArrowException):
with pytest.raises(pa.ArrowInvalid):
table.to_pandas()

def test_unicode_with_unicode_column_and_index(self):
Expand Down Expand Up @@ -1238,7 +1238,7 @@ def test_array_of_bytes_to_strings(self):
# cannot be converted to utf-8
def test_array_of_bytes_to_strings_bad_data(self):
with pytest.raises(
pa.lib.ArrowException,
pa.lib.ArrowInvalid,
message="Unknown error: 'utf-8' codec can't decode byte 0x80 "
"in position 0: invalid start byte"):
pa.array(np.array([b'\x80\x81'], dtype=object), pa.string())
Expand Down Expand Up @@ -1321,12 +1321,12 @@ def test_decimal_128_to_pandas(self):
def test_decimal_fails_with_truncation(self):
data1 = [decimal.Decimal('1.234')]
type1 = pa.decimal128(10, 2)
with pytest.raises(pa.ArrowException):
with pytest.raises(pa.ArrowInvalid):
pa.array(data1, type=type1)

data2 = [decimal.Decimal('1.2345')]
type2 = pa.decimal128(10, 3)
with pytest.raises(pa.ArrowException):
with pytest.raises(pa.ArrowInvalid):
pa.array(data2, type=type2)

def test_decimal_with_different_precisions(self):
Expand Down Expand Up @@ -1737,33 +1737,29 @@ def test_zero_copy_dictionaries(self):
tm.assert_series_equal(pd.Series(result), pd.Series(values),
check_names=False)

def check_zero_copy_failure(self, arr):
with pytest.raises(pa.ArrowInvalid):
arr.to_pandas(zero_copy_only=True)

def test_zero_copy_failure_on_object_types(self):
with pytest.raises(pa.ArrowException):
pa.array(['A', 'B', 'C']).to_pandas(zero_copy_only=True)
self.check_zero_copy_failure(pa.array(['A', 'B', 'C']))

def test_zero_copy_failure_with_int_when_nulls(self):
with pytest.raises(pa.ArrowException):
pa.array([0, 1, None]).to_pandas(zero_copy_only=True)
self.check_zero_copy_failure(pa.array([0, 1, None]))

def test_zero_copy_failure_with_float_when_nulls(self):
with pytest.raises(pa.ArrowException):
pa.array([0.0, 1.0, None]).to_pandas(zero_copy_only=True)
self.check_zero_copy_failure(pa.array([0.0, 1.0, None]))

def test_zero_copy_failure_on_bool_types(self):
with pytest.raises(pa.ArrowException):
pa.array([True, False]).to_pandas(zero_copy_only=True)
self.check_zero_copy_failure(pa.array([True, False]))

def test_zero_copy_failure_on_list_types(self):
arr = np.array([[1, 2], [8, 9]], dtype=object)

with pytest.raises(pa.ArrowException):
pa.array(arr).to_pandas(zero_copy_only=True)
arr = pa.array([[1, 2], [8, 9]], type=pa.list_(pa.int64()))
self.check_zero_copy_failure(arr)

def test_zero_copy_failure_on_timestamp_types(self):
arr = np.array(['2007-07-13'], dtype='datetime64[ns]')

with pytest.raises(pa.ArrowException):
pa.array(arr).to_pandas(zero_copy_only=True)
self.check_zero_copy_failure(pa.array(arr))


class TestConvertMisc(object):
Expand Down Expand Up @@ -1843,11 +1839,11 @@ def test_empty_category(self):

def test_mixed_types_fails(self):
data = pd.DataFrame({'a': ['a', 1, 2.0]})
with pytest.raises(pa.ArrowException):
with pytest.raises(pa.ArrowTypeError):
pa.Table.from_pandas(data)

data = pd.DataFrame({'a': [1, True]})
with pytest.raises(pa.ArrowException):
with pytest.raises(pa.ArrowTypeError):
pa.Table.from_pandas(data)

def test_strided_data_import(self):
Expand Down
4 changes: 2 additions & 2 deletions python/pyarrow/tests/test_serialization.py
Expand Up @@ -592,7 +592,7 @@ def test_serialize_to_components_invalid_cases():
'data': [buf]
}

with pytest.raises(pa.ArrowException):
with pytest.raises(pa.ArrowInvalid):
pa.deserialize_components(components)

components = {
Expand All @@ -601,7 +601,7 @@ def test_serialize_to_components_invalid_cases():
'data': [buf, buf]
}

with pytest.raises(pa.ArrowException):
with pytest.raises(pa.ArrowInvalid):
pa.deserialize_components(components)


Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/test_table.py
Expand Up @@ -234,7 +234,7 @@ def test_recordbatchlist_schema_equals():
batch1 = pa.RecordBatch.from_pandas(data1)
batch2 = pa.RecordBatch.from_pandas(data2)

with pytest.raises(pa.ArrowException):
with pytest.raises(pa.ArrowInvalid):
pa.Table.from_batches([batch1, batch2])


Expand Down