Skip to content

Commit

Permalink
ARROW-8070: [C++] Cast segfaults on unsupported cast from list<binary…
Browse files Browse the repository at this point in the history
…> to utf8

Return with ArrowInvalid for `list<...> => not list` casts.

The `list(binary()) => utf8()` could make sense, but as a `concat` operation.

Closes #6738 from kszucs/ARROW-8070

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Wes McKinney <wesm+git@apache.org>
  • Loading branch information
kszucs authored and wesm committed Mar 27, 2020
1 parent 26bd82f commit 8ee4be7
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 9 deletions.
17 changes: 8 additions & 9 deletions cpp/src/arrow/compute/kernels/cast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1429,8 +1429,8 @@ Status GetListCastFunc(const DataType& in_type, std::shared_ptr<DataType> out_ty
const CastOptions& options,
std::unique_ptr<CastKernelBase>* kernel) {
if (out_type->id() != TypeClass::type_id) {
// Kernel will be null
return Status::OK();
return Status::Invalid("Cannot cast from ", in_type.ToString(), " to ",
out_type->ToString());
}
const DataType& in_value_type = *checked_cast<const TypeClass&>(in_type).value_type();
std::shared_ptr<DataType> out_value_type =
Expand Down Expand Up @@ -1503,19 +1503,18 @@ Status GetCastFunction(const DataType& in_type, std::shared_ptr<DataType> out_ty
CAST_FUNCTION_CASE(LargeStringType);
CAST_FUNCTION_CASE(DictionaryType);
case Type::NA:
cast_kernel.reset(new FromNullCastKernel(std::move(out_type)));
cast_kernel.reset(new FromNullCastKernel(out_type));
break;
case Type::LIST:
RETURN_NOT_OK(
GetListCastFunc<ListType>(in_type, std::move(out_type), options, &cast_kernel));
RETURN_NOT_OK(GetListCastFunc<ListType>(in_type, out_type, options, &cast_kernel));
break;
case Type::LARGE_LIST:
RETURN_NOT_OK(GetListCastFunc<LargeListType>(in_type, std::move(out_type), options,
&cast_kernel));
RETURN_NOT_OK(
GetListCastFunc<LargeListType>(in_type, out_type, options, &cast_kernel));
break;
case Type::EXTENSION:
RETURN_NOT_OK(ExtensionCastKernel::Make(std::move(in_type), std::move(out_type),
options, &cast_kernel));
RETURN_NOT_OK(
ExtensionCastKernel::Make(std::move(in_type), out_type, options, &cast_kernel));
break;
default:
break;
Expand Down
11 changes: 11 additions & 0 deletions cpp/src/arrow/compute/kernels/cast_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1425,6 +1425,17 @@ TEST_F(TestCast, BooleanToString) { TestCastBooleanToString<StringType>(); }

TEST_F(TestCast, BooleanToLargeString) { TestCastBooleanToString<LargeStringType>(); }

TEST_F(TestCast, ListToPrimitive) {
auto from_int = ArrayFromJSON(list(int8()), "[[1, 2], [3, 4]]");
auto from_binary = ArrayFromJSON(list(binary()), "[[\"1\", \"2\"], [\"3\", \"4\"]]");

CastOptions options;
std::shared_ptr<Array> result;

ASSERT_RAISES(Invalid, Cast(&ctx_, *from_int, uint8(), options, &result));
ASSERT_RAISES(Invalid, Cast(&ctx_, *from_binary, utf8(), options, &result));
}

TEST_F(TestCast, ListToList) {
CastOptions options;
std::shared_ptr<Array> offsets;
Expand Down
11 changes: 11 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -897,6 +897,17 @@ def test_cast_none():
arr.cast(None)


def test_cast_list_to_primitive():
# ARROW-8070: cast segfaults on unsupported cast from list<binary> to utf8
arr = pa.array([[1, 2], [3, 4]])
with pytest.raises(pa.ArrowInvalid):
arr.cast(pa.int8())

arr = pa.array([[b"a", b"b"], [b"c"]], pa.list_(pa.binary()))
with pytest.raises(pa.ArrowInvalid):
arr.cast(pa.binary())


def test_cast_chunked_array():
arrays = [pa.array([1, 2, 3]), pa.array([4, 5, 6])]
carr = pa.chunked_array(arrays)
Expand Down

0 comments on commit 8ee4be7

Please sign in to comment.