Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-8070: [C++] Cast segfaults on unsupported cast from list<binary> to utf8 #6738

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions cpp/src/arrow/compute/kernels/cast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1429,8 +1429,8 @@ Status GetListCastFunc(const DataType& in_type, std::shared_ptr<DataType> out_ty
const CastOptions& options,
std::unique_ptr<CastKernelBase>* kernel) {
if (out_type->id() != TypeClass::type_id) {
// Kernel will be null
return Status::OK();
return Status::Invalid("Cannot cast from ", in_type.ToString(), " to ",
out_type->ToString());
}
const DataType& in_value_type = *checked_cast<const TypeClass&>(in_type).value_type();
std::shared_ptr<DataType> out_value_type =
Expand Down Expand Up @@ -1503,19 +1503,18 @@ Status GetCastFunction(const DataType& in_type, std::shared_ptr<DataType> out_ty
CAST_FUNCTION_CASE(LargeStringType);
CAST_FUNCTION_CASE(DictionaryType);
case Type::NA:
cast_kernel.reset(new FromNullCastKernel(std::move(out_type)));
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed move because of the validation below dereferences out_type.

cast_kernel.reset(new FromNullCastKernel(out_type));
break;
case Type::LIST:
RETURN_NOT_OK(
GetListCastFunc<ListType>(in_type, std::move(out_type), options, &cast_kernel));
RETURN_NOT_OK(GetListCastFunc<ListType>(in_type, out_type, options, &cast_kernel));
break;
case Type::LARGE_LIST:
RETURN_NOT_OK(GetListCastFunc<LargeListType>(in_type, std::move(out_type), options,
&cast_kernel));
RETURN_NOT_OK(
GetListCastFunc<LargeListType>(in_type, out_type, options, &cast_kernel));
break;
case Type::EXTENSION:
RETURN_NOT_OK(ExtensionCastKernel::Make(std::move(in_type), std::move(out_type),
options, &cast_kernel));
RETURN_NOT_OK(
ExtensionCastKernel::Make(std::move(in_type), out_type, options, &cast_kernel));
break;
default:
break;
Expand Down
11 changes: 11 additions & 0 deletions cpp/src/arrow/compute/kernels/cast_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1425,6 +1425,17 @@ TEST_F(TestCast, BooleanToString) { TestCastBooleanToString<StringType>(); }

TEST_F(TestCast, BooleanToLargeString) { TestCastBooleanToString<LargeStringType>(); }

TEST_F(TestCast, ListToPrimitive) {
auto from_int = ArrayFromJSON(list(int8()), "[[1, 2], [3, 4]]");
auto from_binary = ArrayFromJSON(list(binary()), "[[\"1\", \"2\"], [\"3\", \"4\"]]");

CastOptions options;
std::shared_ptr<Array> result;

ASSERT_RAISES(Invalid, Cast(&ctx_, *from_int, uint8(), options, &result));
ASSERT_RAISES(Invalid, Cast(&ctx_, *from_binary, utf8(), options, &result));
}

TEST_F(TestCast, ListToList) {
CastOptions options;
std::shared_ptr<Array> offsets;
Expand Down
11 changes: 11 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -897,6 +897,17 @@ def test_cast_none():
arr.cast(None)


def test_cast_list_to_primitive():
# ARROW-8070: cast segfaults on unsupported cast from list<binary> to utf8
arr = pa.array([[1, 2], [3, 4]])
with pytest.raises(pa.ArrowInvalid):
arr.cast(pa.int8())

arr = pa.array([[b"a", b"b"], [b"c"]], pa.list_(pa.binary()))
with pytest.raises(pa.ArrowInvalid):
arr.cast(pa.binary())


def test_cast_chunked_array():
arrays = [pa.array([1, 2, 3]), pa.array([4, 5, 6])]
carr = pa.chunked_array(arrays)
Expand Down