Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ESQL: Add support for TEXT fields in comparison operators and SORT #98528

Merged
merged 16 commits into from Aug 30, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -21,13 +21,13 @@
import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc;
import org.elasticsearch.xpack.esql.expression.function.scalar.math.Abs;
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvMin;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
import org.elasticsearch.xpack.esql.planner.EvalMapper;
import org.elasticsearch.xpack.esql.planner.Layout;
import org.elasticsearch.xpack.esql.type.EsqlDataTypes;
import org.elasticsearch.xpack.ql.expression.FieldAttribute;
import org.elasticsearch.xpack.ql.expression.Literal;
import org.elasticsearch.xpack.ql.expression.predicate.operator.arithmetic.Add;
import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.Equals;
import org.elasticsearch.xpack.ql.tree.Source;
import org.elasticsearch.xpack.ql.type.DataTypes;
import org.elasticsearch.xpack.ql.type.EsField;
Expand Down
@@ -0,0 +1,239 @@
---
setup:

- do:
indices.create:
index: test
body:
mappings:
properties:
"emp_no":
type: long
name:
type: keyword
job:
type: text
fields:
raw:
type: keyword
tag:
type: text

- do:
bulk:
index: test
refresh: true
body:
- { "index": { } }
- { "emp_no": 10, "name": "Jenny", "job": "IT Director", "tag": "foo bar" }
- { "index": { } }
- { "emp_no": 20, "name": "John", "job": "Payroll Specialist", "tag": "baz" }
---
"filter by text":
- do:
esql.query:
body:
query: 'from test | where tag == "baz" | keep emp_no, name, job, tag'

- match: { columns.0.name: "emp_no" }
- match: { columns.0.type: "long" }
- match: { columns.1.name: "name" }
- match: { columns.1.type: "keyword" }
- match: { columns.2.name: "job" }
- match: { columns.2.type: "text" }
- match: { columns.3.name: "tag" }
- match: { columns.3.type: "text" }

- length: { values: 1 }
- match: { values.0: [ 20, "John", "Payroll Specialist", "baz"] }

---
"like by text":
- do:
esql.query:
body:
query: 'from test | where tag LIKE "*az" | keep emp_no, name, job, tag'

- match: { columns.0.name: "emp_no" }
- match: { columns.0.type: "long" }
- match: { columns.1.name: "name" }
- match: { columns.1.type: "keyword" }
- match: { columns.2.name: "job" }
- match: { columns.2.type: "text" }
- match: { columns.3.name: "tag" }
- match: { columns.3.type: "text" }

- length: { values: 1 }
- match: { values.0: [ 20, "John", "Payroll Specialist", "baz"] }

---
"rlike by text":
- do:
esql.query:
body:
query: 'from test | where tag RLIKE ".*az" | keep emp_no, name, job, tag'

- match: { columns.0.name: "emp_no" }
- match: { columns.0.type: "long" }
- match: { columns.1.name: "name" }
- match: { columns.1.type: "keyword" }
- match: { columns.2.name: "job" }
- match: { columns.2.type: "text" }
- match: { columns.3.name: "tag" }
- match: { columns.3.type: "text" }

- length: { values: 1 }
- match: { values.0: [ 20, "John", "Payroll Specialist", "baz"] }

---
"eval and filter text":
- do:
esql.query:
body:
query: 'from test | eval x = tag | where x == "baz" | keep emp_no, name, job, x'

- match: { columns.0.name: "emp_no" }
- match: { columns.0.type: "long" }
- match: { columns.1.name: "name" }
- match: { columns.1.type: "keyword" }
- match: { columns.2.name: "job" }
- match: { columns.2.type: "text" }
- match: { columns.3.name: "x" }
- match: { columns.3.type: "text" }

- length: { values: 1 }
- match: { values.0: [ 20, "John", "Payroll Specialist", "baz"] }

---
"filter on text multi-field":
- do:
esql.query:
body:
query: 'from test | where job == "IT Director" | keep emp_no, name, job, tag'

- match: { columns.0.name: "emp_no" }
- match: { columns.0.type: "long" }
- match: { columns.1.name: "name" }
- match: { columns.1.type: "keyword" }
- match: { columns.2.name: "job" }
- match: { columns.2.type: "text" }
- match: { columns.3.name: "tag" }
- match: { columns.3.type: "text" }

- length: { values: 1 }
- match: { values.0: [ 10, "Jenny", "IT Director", "foo bar"] }

---
"like by multi-field text":
- do:
esql.query:
body:
query: 'from test | where job LIKE "*Specialist" | keep emp_no, name, job, tag'

- match: { columns.0.name: "emp_no" }
- match: { columns.0.type: "long" }
- match: { columns.1.name: "name" }
- match: { columns.1.type: "keyword" }
- match: { columns.2.name: "job" }
- match: { columns.2.type: "text" }
- match: { columns.3.name: "tag" }
- match: { columns.3.type: "text" }

- length: { values: 1 }
- match: { values.0: [ 20, "John", "Payroll Specialist", "baz"] }

---
"rlike by multi-field text":
- do:
esql.query:
body:
query: 'from test | where job RLIKE ".*Specialist" | keep emp_no, name, job, tag'

- match: { columns.0.name: "emp_no" }
- match: { columns.0.type: "long" }
- match: { columns.1.name: "name" }
- match: { columns.1.type: "keyword" }
- match: { columns.2.name: "job" }
- match: { columns.2.type: "text" }
- match: { columns.3.name: "tag" }
- match: { columns.3.type: "text" }

- length: { values: 1 }
- match: { values.0: [ 20, "John", "Payroll Specialist", "baz"] }


---
"sort by text":
- do:
esql.query:
body:
query: 'from test | sort tag | keep emp_no, name, job, tag'

- match: { columns.0.name: "emp_no" }
- match: { columns.0.type: "long" }
- match: { columns.1.name: "name" }
- match: { columns.1.type: "keyword" }
- match: { columns.2.name: "job" }
- match: { columns.2.type: "text" }
- match: { columns.3.name: "tag" }
- match: { columns.3.type: "text" }

- length: { values: 2 }
- match: { values.0: [ 20, "John", "Payroll Specialist", "baz"] }
- match: { values.1: [ 10, "Jenny", "IT Director", "foo bar"] }


---
"sort by text multi-field":
- do:
esql.query:
body:
query: 'from test | sort job | keep emp_no, name, job, tag'

- match: { columns.0.name: "emp_no" }
- match: { columns.0.type: "long" }
- match: { columns.1.name: "name" }
- match: { columns.1.type: "keyword" }
- match: { columns.2.name: "job" }
- match: { columns.2.type: "text" }
- match: { columns.3.name: "tag" }
- match: { columns.3.type: "text" }

- length: { values: 2 }
- match: { values.0: [ 10, "Jenny", "IT Director", "foo bar"] }
- match: { values.1: [ 20, "John", "Payroll Specialist", "baz"] }

---
"sort by text multi-field desc":
- do:
esql.query:
body:
query: 'from test | sort job desc | keep emp_no, name, job, tag'

- match: { columns.0.name: "emp_no" }
- match: { columns.0.type: "long" }
- match: { columns.1.name: "name" }
- match: { columns.1.type: "keyword" }
- match: { columns.2.name: "job" }
- match: { columns.2.type: "text" }
- match: { columns.3.name: "tag" }
- match: { columns.3.type: "text" }

- length: { values: 2 }
- match: { values.0: [ 20, "John", "Payroll Specialist", "baz"] }
- match: { values.1: [ 10, "Jenny", "IT Director", "foo bar"] }


---
"text in functions":
- do:
esql.query:
body:
query: 'from test | sort name | eval description = concat(name, " - ", job) | keep description'

- match: { columns.0.name: "description" }
- match: { columns.0.type: "keyword" }

- length: { values: 2 }
- match: { values.0: [ "Jenny - IT Director"] }
- match: { values.1: [ "John - Payroll Specialist"] }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This rule made sense before since the comparison classes were part of QL - now that we have our own classes, the type validation should be within the comparison classes themselves.
There's no advantage of it being outside rather it's confusing since the classes inherit the QL behavior which is used inside resolution and then complemented through the Verifier, which is both redundant and error-prone (the type resolution and Verifier need to be kept in sync and not trip over one another).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did some tests, moving this logic inside the resolution flow; the fix is not complex, but it has some practical implications:

  • the Analyzer does some automatic conversion (KEYWORD->DATETIME), that happen before this rule, and only if the expressions are foldable. At the same time, the resolution is used before the conversion (eg. in ResolveFunctions rule) and fails if we keep the logic as it is. So we need to review the logic a bit.
  • AbstractBinaryComparisonTestCase does not consider these automatic conversions (and today it makes no distinction between foldable and non-foldable expressions, see above), so we have to review that as well.

If you don't mind, I'd prefer to address this problem with a follow-up PR and discuss the changes separately

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WFM - please raise an issue that explains the follow-up items and link it here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Expand Up @@ -8,6 +8,8 @@
package org.elasticsearch.xpack.esql.analysis;

import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.NotEquals;
import org.elasticsearch.xpack.esql.plan.logical.Dissect;
import org.elasticsearch.xpack.esql.plan.logical.Eval;
import org.elasticsearch.xpack.esql.plan.logical.Grok;
Expand All @@ -28,8 +30,6 @@
import org.elasticsearch.xpack.ql.expression.function.aggregate.AggregateFunction;
import org.elasticsearch.xpack.ql.expression.predicate.BinaryOperator;
import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.BinaryComparison;
import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.Equals;
import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.NotEquals;
import org.elasticsearch.xpack.ql.plan.logical.Aggregate;
import org.elasticsearch.xpack.ql.plan.logical.Filter;
import org.elasticsearch.xpack.ql.plan.logical.Limit;
Expand Down Expand Up @@ -258,6 +258,9 @@ public static Failure validateBinaryComparison(BinaryComparison bc) {
if (false == r.resolved()) {
return fail(bc, r.message());
}
if (DataTypes.isString(bc.left().dataType()) && DataTypes.isString(bc.right().dataType())) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed? Aren't the operators performing their own, full type resolution?
In fact why still have this rule in place?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my comment above #98528 (comment)

return null;
}
if (bc.left().dataType() != bc.right().dataType()) {
return fail(
bc,
Expand Down
@@ -0,0 +1,40 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.esql.expression;

import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.tree.NodeInfo;
import org.elasticsearch.xpack.ql.tree.Source;
import org.elasticsearch.xpack.ql.type.DataTypes;

import java.util.List;

public class Order extends org.elasticsearch.xpack.ql.expression.Order {
public Order(Source source, Expression child, OrderDirection direction, NullsPosition nulls) {
super(source, child, direction, nulls);
}

@Override
protected TypeResolution resolveType() {
if (DataTypes.isString(child().dataType())) {
return TypeResolution.TYPE_RESOLVED;
}
return super.resolveType();
}

@Override
public Order replaceChildren(List<Expression> newChildren) {
return new Order(source(), newChildren.get(0), direction(), nullsPosition());
}

@Override
protected NodeInfo<org.elasticsearch.xpack.ql.expression.Order> info() {
return NodeInfo.create(this, Order::new, child(), direction(), nullsPosition());
}

}
Expand Up @@ -8,8 +8,53 @@

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.compute.ann.Evaluator;
import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.expression.FieldAttribute;
import org.elasticsearch.xpack.ql.expression.TypeResolutions;
import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.BinaryComparison;
import org.elasticsearch.xpack.ql.tree.NodeInfo;
import org.elasticsearch.xpack.ql.tree.Source;
import org.elasticsearch.xpack.ql.type.DataTypes;

import java.time.ZoneId;

public class Equals extends org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.Equals {
public Equals(Source source, Expression left, Expression right) {
super(source, left, right);
}

public Equals(Source source, Expression left, Expression right, ZoneId zoneId) {
super(source, left, right, zoneId);
}

@Override
protected TypeResolution resolveInputType(Expression e, TypeResolutions.ParamOrdinal paramOrdinal) {
if (e instanceof FieldAttribute fa && fa.dataType() == DataTypes.TEXT) {
return TypeResolution.TYPE_RESOLVED;
}
return super.resolveInputType(e, paramOrdinal);
}

@Override
protected NodeInfo<org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.Equals> info() {
return NodeInfo.create(this, Equals::new, left(), right(), zoneId());
}

@Override
protected Equals replaceChildren(Expression newLeft, Expression newRight) {
return new Equals(source(), newLeft, newRight, zoneId());
}

@Override
public Equals swapLeftAndRight() {
return new Equals(source(), right(), left(), zoneId());
}

@Override
public BinaryComparison negate() {
return new NotEquals(source(), left(), right(), zoneId());
}

public class Equals {
@Evaluator(extraName = "Ints")
static boolean processInts(int lhs, int rhs) {
return lhs == rhs;
Expand Down