Skip to content

Commit

Permalink
add ILIKE support (#4639)
Browse files Browse the repository at this point in the history
* test: add `LIKE` sqllogictest

* feat: support `[NOT ]ILIKE`
  • Loading branch information
crepererum committed Dec 15, 2022
1 parent 2466877 commit c132eca
Show file tree
Hide file tree
Showing 11 changed files with 245 additions and 8 deletions.
6 changes: 6 additions & 0 deletions datafusion/core/src/physical_plan/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1946,6 +1946,12 @@ mod tests {
col("c2").and(bool_expr),
// utf8 LIKE u32
col("c1").like(col("c2")),
// utf8 NOT LIKE u32
col("c1").not_like(col("c2")),
// utf8 ILIKE u32
col("c1").ilike(col("c2")),
// utf8 NOT ILIKE u32
col("c1").not_ilike(col("c2")),
];
for case in cases {
let logical_plan = test_csv_scan().await?.project(vec![case.clone()]);
Expand Down
80 changes: 80 additions & 0 deletions datafusion/core/tests/sqllogictests/test_files/strings.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

statement ok
CREATE TABLE test(
s TEXT,
) as VALUES
('p1'),
('p1e1'),
('p1m1e1'),
('P1'),
('P1e1'),
('P1m1e1'),
('e1'),
('p2'),
('p2e1'),
('p2m1e1')
;

# LIKE
query T
SELECT s FROM test WHERE s LIKE 'p1%';
----
p1
p1e1
p1m1e1

query T
SELECT s FROM test WHERE s LIKE '%m1%';
----
p1m1e1
P1m1e1
p2m1e1

# NOT LIKE
query T
SELECT s FROM test WHERE s NOT LIKE 'p1%';
----
P1
P1e1
P1m1e1
e1
p2
p2e1
p2m1e1


# ILIKE
query T
SELECT s FROM test WHERE s ILIKE 'p1%';
----
p1
p1e1
p1m1e1
P1
P1e1
P1m1e1

# NOT ILIKE
query T
SELECT s FROM test WHERE s NOT ILIKE 'p1%';
----
e1
p2
p2e1
p2m1e1
27 changes: 25 additions & 2 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,9 @@ impl BinaryExpr {
match self.op {
Operator::Or => 5,
Operator::And => 10,
Operator::Like | Operator::NotLike => 19,
Operator::Like | Operator::NotLike | Operator::ILike | Operator::NotILike => {
19
}
Operator::NotEq
| Operator::Eq
| Operator::Lt
Expand All @@ -286,7 +288,18 @@ impl BinaryExpr {
| Operator::GtEq => 20,
Operator::Plus | Operator::Minus => 30,
Operator::Multiply | Operator::Divide | Operator::Modulo => 40,
_ => 0,
Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom
| Operator::RegexMatch
| Operator::RegexNotMatch
| Operator::RegexIMatch
| Operator::RegexNotIMatch
| Operator::BitwiseAnd
| Operator::BitwiseOr
| Operator::BitwiseShiftLeft
| Operator::BitwiseShiftRight
| Operator::BitwiseXor
| Operator::StringConcat => 0,
}
}
}
Expand Down Expand Up @@ -611,6 +624,16 @@ impl Expr {
binary_expr(self, Operator::NotLike, other)
}

/// Return `self ILIKE other`
pub fn ilike(self, other: Expr) -> Expr {
binary_expr(self, Operator::ILike, other)
}

/// Return `self NOT ILIKE other`
pub fn not_ilike(self, other: Expr) -> Expr {
binary_expr(self, Operator::NotILike, other)
}

/// Return `self AS name` alias expression
pub fn alias(self, name: impl Into<String>) -> Expr {
Expr::Alias(Box::new(self), name.into())
Expand Down
10 changes: 10 additions & 0 deletions datafusion/expr/src/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ pub enum Operator {
Like,
/// Does not match a wildcard pattern
NotLike,
/// Matches a wildcard pattern, ignores case
ILike,
/// Does not match a wildcard pattern, ignores case
NotILike,
/// IS DISTINCT FROM
IsDistinctFrom,
/// IS NOT DISTINCT FROM
Expand Down Expand Up @@ -94,6 +98,8 @@ impl Operator {
Operator::GtEq => Some(Operator::Lt),
Operator::Like => Some(Operator::NotLike),
Operator::NotLike => Some(Operator::Like),
Operator::ILike => Some(Operator::NotILike),
Operator::NotILike => Some(Operator::ILike),
Operator::IsDistinctFrom => Some(Operator::IsNotDistinctFrom),
Operator::IsNotDistinctFrom => Some(Operator::IsDistinctFrom),
Operator::Plus
Expand Down Expand Up @@ -129,6 +135,8 @@ impl Operator {
Operator::GtEq => Some(Operator::LtEq),
Operator::Like
| Operator::NotLike
| Operator::ILike
| Operator::NotILike
| Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom
| Operator::Plus
Expand Down Expand Up @@ -170,6 +178,8 @@ impl fmt::Display for Operator {
Operator::Or => "OR",
Operator::Like => "LIKE",
Operator::NotLike => "NOT LIKE",
Operator::ILike => "ILIKE",
Operator::NotILike => "NOT ILIKE",
Operator::RegexMatch => "~",
Operator::RegexIMatch => "~*",
Operator::RegexNotMatch => "!~",
Expand Down
24 changes: 23 additions & 1 deletion datafusion/expr/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ pub fn binary_operator_data_type(
| Operator::Or
| Operator::Like
| Operator::NotLike
| Operator::ILike
| Operator::NotILike
| Operator::Lt
| Operator::Gt
| Operator::GtEq
Expand Down Expand Up @@ -116,7 +118,9 @@ pub fn coerce_types(
| Operator::GtEq
| Operator::LtEq => comparison_coercion(lhs_type, rhs_type),
// "like" operators operate on strings and always return a boolean
Operator::Like | Operator::NotLike => like_coercion(lhs_type, rhs_type),
Operator::Like | Operator::NotLike | Operator::ILike | Operator::NotILike => {
like_coercion(lhs_type, rhs_type)
}
// date +/- interval returns date
Operator::Plus | Operator::Minus
if (*lhs_type == DataType::Date32
Expand Down Expand Up @@ -852,6 +856,24 @@ mod tests {
Operator::Like,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Utf8,
Operator::NotLike,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Utf8,
Operator::ILike,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Utf8,
Operator::NotILike,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Date32,
Expand Down
20 changes: 20 additions & 0 deletions datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,26 @@ mod tests {
assert_optimized_plan_eq(&plan, expected)
}

#[test]
fn simplify_not_ilike() -> Result<()> {
let schema = Schema::new(vec![
Field::new("a", DataType::Utf8, false),
Field::new("b", DataType::Utf8, false),
]);
let table_scan = table_scan(Some("test"), &schema, None)
.expect("creating scan")
.build()
.expect("building plan");

let plan = LogicalPlanBuilder::from(table_scan)
.filter(col("a").ilike(col("b")).not())?
.build()?;
let expected = "Filter: test.a NOT ILIKE test.b\
\n TableScan: test";

assert_optimized_plan_eq(&plan, expected)
}

#[test]
fn simplify_not_distinct_from() -> Result<()> {
let table_scan = test_table_scan();
Expand Down
2 changes: 1 addition & 1 deletion datafusion/optimizer/src/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ impl ExprRewriter for TypeCoercionRewriter {
let left_type = expr.get_type(&self.schema)?;
let right_type = pattern.get_type(&self.schema)?;
let coerced_type =
coerce_types(&left_type, &Operator::Like, &right_type)?;
coerce_types(&left_type, &Operator::ILike, &right_type)?;
let expr = Box::new(expr.cast_to(&coerced_type, &self.schema)?);
let pattern = Box::new(pattern.cast_to(&coerced_type, &self.schema)?);
let expr = Expr::ILike(Like::new(negated, expr, pattern, escape_char));
Expand Down
60 changes: 56 additions & 4 deletions datafusion/physical-expr/src/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,12 @@ use arrow::compute::kernels::comparison::{
use arrow::compute::kernels::comparison::{
eq_scalar, gt_eq_scalar, gt_scalar, lt_eq_scalar, lt_scalar, neq_scalar,
};
use arrow::compute::kernels::comparison::{like_utf8, nlike_utf8, regexp_is_match_utf8};
use arrow::compute::kernels::comparison::{
like_utf8_scalar, nlike_utf8_scalar, regexp_is_match_utf8_scalar,
ilike_utf8, like_utf8, nilike_utf8, nlike_utf8, regexp_is_match_utf8,
};
use arrow::compute::kernels::comparison::{
ilike_utf8_scalar, like_utf8_scalar, nilike_utf8_scalar, nlike_utf8_scalar,
regexp_is_match_utf8_scalar,
};

use adapter::{eq_dyn, gt_dyn, gt_eq_dyn, lt_dyn, lt_eq_dyn, neq_dyn};
Expand Down Expand Up @@ -944,6 +947,12 @@ impl BinaryExpr {
Operator::NotLike => {
binary_string_array_op_scalar!(array, scalar.clone(), nlike, bool_type)
}
Operator::ILike => {
binary_string_array_op_scalar!(array, scalar.clone(), ilike, bool_type)
}
Operator::NotILike => {
binary_string_array_op_scalar!(array, scalar.clone(), nilike, bool_type)
}
Operator::Plus => {
binary_primitive_array_op_scalar!(array, scalar.clone(), add)
}
Expand Down Expand Up @@ -1046,6 +1055,8 @@ impl BinaryExpr {
match &self.op {
Operator::Like => binary_string_array_op!(left, right, like),
Operator::NotLike => binary_string_array_op!(left, right, nlike),
Operator::ILike => binary_string_array_op!(left, right, ilike),
Operator::NotILike => binary_string_array_op!(left, right, nilike),
Operator::Lt => lt_dyn(&left, &right),
Operator::LtEq => lt_eq_dyn(&left, &right),
Operator::Gt => gt_dyn(&left, &right),
Expand Down Expand Up @@ -1275,7 +1286,12 @@ mod tests {
.expect("failed to downcast");
// verify that the result itself is correct
for (i, x) in $VEC.iter().enumerate() {
assert_eq!(result.value(i), *x);
let v = result.value(i);
assert_eq!(
v,
*x,
"Unexpected output at position {i}:\n\nActual:\n{v}\n\nExpected:\n{x}"
);
}
}};
}
Expand Down Expand Up @@ -1342,6 +1358,42 @@ mod tests {
DataType::Boolean,
vec![true, false],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["hello world", "world"],
StringArray,
DataType::Utf8,
vec!["%hello%", "%hello%"],
Operator::NotLike,
BooleanArray,
DataType::Boolean,
vec![false, true],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["hEllo world", "world"],
StringArray,
DataType::Utf8,
vec!["%helLo%", "%helLo%"],
Operator::ILike,
BooleanArray,
DataType::Boolean,
vec![true, false],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["hEllo world", "world"],
StringArray,
DataType::Utf8,
vec!["%helLo%", "%helLo%"],
Operator::NotILike,
BooleanArray,
DataType::Boolean,
vec![false, true],
);
test_coercion!(
StringArray,
DataType::Utf8,
Expand Down Expand Up @@ -3083,7 +3135,7 @@ mod tests {
min_value: ScalarValue,
max_value: ScalarValue,
) -> (Schema, Statistics) {
assert!(min_value.get_datatype() == max_value.get_datatype());
assert_eq!(min_value.get_datatype(), max_value.get_datatype());
let schema = Schema::new(vec![Field::new("a", min_value.get_datatype(), false)]);
let columns = vec![ColumnStatistics {
min_value: Some(min_value),
Expand Down
20 changes: 20 additions & 0 deletions datafusion/physical-expr/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,26 @@ pub fn create_physical_expr(
binary_expr(expr.as_ref().clone(), op, pattern.as_ref().clone());
create_physical_expr(&bin_expr, input_dfschema, input_schema, execution_props)
}
Expr::ILike(Like {
negated,
expr,
pattern,
escape_char,
}) => {
if escape_char.is_some() {
return Err(DataFusionError::Execution(
"ILIKE does not support escape_char".to_string(),
));
}
let op = if *negated {
Operator::NotILike
} else {
Operator::ILike
};
let bin_expr =
binary_expr(expr.as_ref().clone(), op, pattern.as_ref().clone());
create_physical_expr(&bin_expr, input_dfschema, input_schema, execution_props)
}
Expr::Case(case) => {
let expr: Option<Arc<dyn PhysicalExpr>> = if let Some(e) = &case.expr {
Some(create_physical_expr(
Expand Down
2 changes: 2 additions & 0 deletions datafusion/proto/src/from_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1358,6 +1358,8 @@ pub fn from_proto_binary_op(op: &str) -> Result<Operator, Error> {
"Modulo" => Ok(Operator::Modulo),
"Like" => Ok(Operator::Like),
"NotLike" => Ok(Operator::NotLike),
"ILike" => Ok(Operator::ILike),
"NotILike" => Ok(Operator::NotILike),
"IsDistinctFrom" => Ok(Operator::IsDistinctFrom),
"IsNotDistinctFrom" => Ok(Operator::IsNotDistinctFrom),
"BitwiseAnd" => Ok(Operator::BitwiseAnd),
Expand Down
Loading

0 comments on commit c132eca

Please sign in to comment.