Skip to content

Commit

Permalink
feat: Implement equality = and inequality <> support for StringView (#…
Browse files Browse the repository at this point in the history
…10985)

* feat: Implement equality = and inequality <> support for StringView

* chore: Add tests for the StringView

* chore

* chore: Update tests for NULL

* fix: Used build_array_string!

* chore: Update string_coercion function to handle Utf8View type in binary.rs

* chore: add tests

* chore: ci
  • Loading branch information
Weijun-H committed Jun 19, 2024
1 parent 810cce7 commit 507d978
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 39 deletions.
24 changes: 12 additions & 12 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,15 @@ unused_imports = "deny"
## Temporary arrow-rs patch until 52.1.0 is released

[patch.crates-io]
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
30 changes: 15 additions & 15 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 11 additions & 11 deletions datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@ rstest = "0.17"
## Temporary arrow-rs patch until 52.1.0 is released

[patch.crates-io]
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
2 changes: 1 addition & 1 deletion datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1570,6 +1570,7 @@ impl ScalarValue {
DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
DataType::Utf8 => build_array_string!(StringArray, Utf8),
DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
DataType::Binary => build_array_string!(BinaryArray, Binary),
Expand Down Expand Up @@ -1726,7 +1727,6 @@ impl ScalarValue {
| DataType::Time64(TimeUnit::Millisecond)
| DataType::Map(_, _)
| DataType::RunEndEncoded(_, _)
| DataType::Utf8View
| DataType::BinaryView
| DataType::ListView(_)
| DataType::LargeListView(_) => {
Expand Down
1 change: 1 addition & 0 deletions datafusion/expr/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,7 @@ fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
(LargeUtf8, Utf8) => Some(LargeUtf8),
(Utf8, LargeUtf8) => Some(LargeUtf8),
(LargeUtf8, LargeUtf8) => Some(LargeUtf8),
(Utf8View, Utf8View) | (Utf8View, Utf8) | (Utf8, Utf8View) => Some(Utf8View),
_ => None,
}
}
Expand Down
113 changes: 113 additions & 0 deletions datafusion/sqllogictest/test_files/string_view.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.


# test StringViewArray with Utf8View columns
statement ok
create table test as values (arrow_cast('Andrew', 'Utf8View'), arrow_cast('X', 'Utf8View')),
(arrow_cast('Xiangpeng', 'Utf8View'), arrow_cast('Xiangpeng', 'Utf8View')),
(arrow_cast('Raphael', 'Utf8View'), arrow_cast('R', 'Utf8View')),
(arrow_cast(NULL, 'Utf8View'), arrow_cast('R', 'Utf8View'));

query B
select arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View');
----
false

query B
select arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
----
true

query B
select arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View');
----
true

query B
select arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
----
true

query ??
select * from test where column1 = column2;
----
Xiangpeng Xiangpeng

query ??
select * from test where column1 <> column2;
----
Andrew X
Raphael R

query ??
select * from test where column1 = arrow_cast('Andrew', 'Utf8View');
----
Andrew X

query ??
select * from test where column1 = 'Andrew';
----
Andrew X

query ??
select * from test where column1 <> arrow_cast('Andrew', 'Utf8View');
----
Xiangpeng Xiangpeng
Raphael R

query ??
select * from test where column1 <> 'Andrew';
----
Xiangpeng Xiangpeng
Raphael R

statement ok
drop table test;


# test StringViewArray with Utf8 and Utf8View columns
statement ok
create table test as values ('Andrew', arrow_cast('X', 'Utf8View')),
('Xiangpeng', arrow_cast('Xiangpeng', 'Utf8View')),
('Raphael', arrow_cast('R', 'Utf8View')),
(NULL, arrow_cast('R', 'Utf8View'));

query T?
select * from test where column1 = column2;
----
Xiangpeng Xiangpeng

query T?
select * from test where column1 <> column2;
----
Andrew X
Raphael R

query T?
select * from test where column1 = arrow_cast('Andrew', 'Utf8View');
----
Andrew X

query T?
select * from test where column1 <> arrow_cast('Andrew', 'Utf8View');
----
Xiangpeng Xiangpeng
Raphael R

statement ok
drop table test;

0 comments on commit 507d978

Please sign in to comment.