Skip to content

Commit

Permalink
ESQL: Nested expressions inside stats command (#104387)
Browse files Browse the repository at this point in the history
Allow nested expressions to be used both for grouping or inside
 aggregate functions inside the stats command.
As such the grammar has been tweaked to allow the stats group to have 
 optional aliasing.
As part of this fix, preserve the original field declaration (including 
 spaces) for implicit aliases.
Improve validation for incorrect aggregate function use (as arguments,
 grouping or inside evals).

Fix #99828
  • Loading branch information
costin committed Jan 17, 2024
1 parent 42bda44 commit 607185b
Show file tree
Hide file tree
Showing 22 changed files with 1,096 additions and 899 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/104387.yaml
@@ -0,0 +1,6 @@
pr: 104387
summary: "ESQL: Nested expressions inside stats command"
area: ES|QL
type: enhancement
issues:
- 99828
8 changes: 4 additions & 4 deletions docs/reference/esql/multivalued-fields.asciidoc
Expand Up @@ -201,8 +201,8 @@ POST /_query
"columns": [
{ "name": "a", "type": "long"},
{ "name": "b", "type": "long"},
{ "name": "b+2", "type": "long"},
{ "name": "a+b", "type": "long"}
{ "name": "b + 2", "type": "long"},
{ "name": "a + b", "type": "long"}
],
"values": [
[1, [1, 2], null, null],
Expand Down Expand Up @@ -236,8 +236,8 @@ POST /_query
"columns": [
{ "name": "a", "type": "long"},
{ "name": "b", "type": "long"},
{ "name": "b+2", "type": "long"},
{ "name": "a+b", "type": "long"}
{ "name": "b + 2", "type": "long"},
{ "name": "a + b", "type": "long"}
],
"values": [
[1, 1, 3, 2],
Expand Down
Expand Up @@ -319,7 +319,7 @@ Parto |Bamford |6.004230000000001
// end::evalReplace-result[]
;

docsEvalUnnamedColumn
docsEvalUnnamedColumn#[skip:-8.12.99,reason:expression spaces are maintained since 8.13]
// tag::evalUnnamedColumn[]
FROM employees
| SORT emp_no
Expand All @@ -329,7 +329,7 @@ FROM employees
| LIMIT 3;

// tag::evalUnnamedColumn-result[]
first_name:keyword | last_name:keyword | height:double | height*3.281:double
first_name:keyword | last_name:keyword | height:double | height * 3.281:double
Georgi |Facello |2.03 |6.66043
Bezalel |Simmel |2.08 |6.82448
Parto |Bamford |1.83 |6.004230000000001
Expand All @@ -348,4 +348,4 @@ FROM employees
avg_height_feet:double
5.801464200000001
// end::evalUnnamedColumnStats-result[]
;
;
Expand Up @@ -56,15 +56,15 @@ COUNT(emp_no):long
// end::is-not-null-result[]
;

coalesceSimple
coalesceSimple#[skip:-8.12.99,reason:expression spaces are maintained since 8.13]
// tag::coalesce[]
ROW a=null, b="b"
| EVAL COALESCE(a, b)
// end::coalesce[]
;

// tag::coalesce-result[]
a:null | b:keyword | COALESCE(a,b):keyword
a:null | b:keyword | COALESCE(a, b):keyword
null | b | b
// end::coalesce-result[]
;
Expand Down
Expand Up @@ -879,3 +879,60 @@ AVG(salary):double | avg_salary_rounded:double
48248.55 | 48249.0
// end::statsUnnamedColumnEval-result[]
;

nestedExpressionNoGrouping#[skip:-8.12.99,reason:StatsNestedExp breaks bwc]
FROM employees
| STATS s = SUM(emp_no + 3), c = COUNT(emp_no)
;

s: long | c: long
1005350 | 100
;

nestedExpressionInSurrogateAgg#[skip:-8.12.99,reason:StatsNestedExp breaks bwc]
FROM employees
| STATS a = AVG(emp_no % 5), s = SUM(emp_no % 5), c = COUNT(emp_no % 5)
;

a:double | s:long | c:long
2.0 | 200 | 100
;

nestedExpressionInGroupingWithAlias#[skip:-8.12.99,reason:StatsNestedExp breaks bwc]
FROM employees
| STATS s = SUM(emp_no % 5), c = COUNT(emp_no % 5) BY l = languages + 20
| SORT l
;

s:long | c:long | l : i
39 | 15 | 21
36 | 19 | 22
30 | 17 | 23
32 | 18 | 24
43 | 21 | 25
20 | 10 | null
;

nestedMultiExpressionInGroupingsAndAggs#[skip:-8.12.99,reason:StatsNestedExp breaks bwc]
FROM employees
| EVAL sal = salary + 10000
| STATS sum(sal), sum(salary + 10000) BY left(first_name, 1), concat(gender, to_string(languages))
| SORT `left(first_name, 1)`, `concat(gender, to_string(languages))`
| LIMIT 5
;

sum(sal):l | sum(salary + 10000):l | left(first_name, 1):s | concat(gender, to_string(languages)):s
54307 | 54307 | A | F2
70335 | 70335 | A | F3
76817 | 76817 | A | F5
123675 | 123675 | A | M3
43370 | 43370 | B | F2
;








Expand Up @@ -87,15 +87,15 @@ COUNT_DISTINCT(ip0):long | COUNT_DISTINCT(ip1):long
// end::count-distinct-result[]
;

countDistinctOfIpPrecision
countDistinctOfIpPrecision#[skip:-8.12.99,reason:expression spaces are maintained since 8.13]
// tag::count-distinct-precision[]
FROM hosts
| STATS COUNT_DISTINCT(ip0, 80000), COUNT_DISTINCT(ip1, 5)
// end::count-distinct-precision[]
;

// tag::count-distinct-precision-result[]
COUNT_DISTINCT(ip0,80000):long | COUNT_DISTINCT(ip1,5):long
COUNT_DISTINCT(ip0, 80000):long | COUNT_DISTINCT(ip1, 5):long
7 | 9
// end::count-distinct-precision-result[]
;
Expand Down
Expand Up @@ -77,15 +77,15 @@ m:double | p50:double
0 | 0
;

medianOfInteger#[skip:-8.11.99,reason:ReplaceDuplicateAggWithEval breaks bwc gh-103765]
medianOfInteger#[skip:-8.12.99,reason:ReplaceDuplicateAggWithEval breaks bwc gh-103765/Expression spaces are maintained since 8.13]
// tag::median[]
FROM employees
| STATS MEDIAN(salary), PERCENTILE(salary, 50)
// end::median[]
;

// tag::median-result[]
MEDIAN(salary):double | PERCENTILE(salary,50):double
MEDIAN(salary):double | PERCENTILE(salary, 50):double
47003 | 47003
// end::median-result[]
;
Expand Down
8 changes: 2 additions & 6 deletions x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.g4
Expand Up @@ -111,15 +111,11 @@ evalCommand
;

statsCommand
: STATS fields? (BY grouping)?
: STATS stats=fields? (BY grouping=fields)?
;

inlinestatsCommand
: INLINESTATS fields (BY grouping)?
;

grouping
: qualifiedName (COMMA qualifiedName)*
: INLINESTATS stats=fields (BY grouping=fields)?
;

fromIdentifier
Expand Down
Expand Up @@ -21,11 +21,8 @@
import org.elasticsearch.xpack.ql.common.Failure;
import org.elasticsearch.xpack.ql.expression.Alias;
import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.expression.FieldAttribute;
import org.elasticsearch.xpack.ql.expression.Literal;
import org.elasticsearch.xpack.ql.expression.MetadataAttribute;
import org.elasticsearch.xpack.ql.expression.Expressions;
import org.elasticsearch.xpack.ql.expression.NamedExpression;
import org.elasticsearch.xpack.ql.expression.ReferenceAttribute;
import org.elasticsearch.xpack.ql.expression.TypeResolutions;
import org.elasticsearch.xpack.ql.expression.UnresolvedAttribute;
import org.elasticsearch.xpack.ql.expression.function.aggregate.AggregateFunction;
Expand Down Expand Up @@ -150,36 +147,39 @@ else if (p.resolved()) {

private static void checkAggregate(LogicalPlan p, Set<Failure> failures) {
if (p instanceof Aggregate agg) {
// check aggregates
agg.aggregates().forEach(e -> {
var exp = e instanceof Alias ? ((Alias) e).child() : e;
if (exp instanceof AggregateFunction aggFunc) {
Expression field = aggFunc.field();

// TODO: allow an expression?
if ((field instanceof FieldAttribute
|| field instanceof MetadataAttribute
|| field instanceof ReferenceAttribute
|| field instanceof Literal) == false) {
var exp = e instanceof Alias a ? a.child() : e;
if (exp instanceof AggregateFunction af) {
af.field().forEachDown(AggregateFunction.class, f -> {
failures.add(fail(f, "nested aggregations [{}] not allowed inside other aggregations [{}]", f, af));
});
} else {
if (Expressions.match(agg.groupings(), g -> {
Expression to = g instanceof Alias al ? al.child() : g;
return to.semanticEquals(exp);
}) == false) {
failures.add(
fail(
e,
"aggregate function's field must be an attribute or literal; found ["
+ field.sourceText()
exp,
"expected an aggregate function or group but got ["
+ exp.sourceText()
+ "] of type ["
+ field.nodeName()
+ exp.nodeName()
+ "]"
)
);
}
} else if (agg.groupings().contains(exp) == false) { // TODO: allow an expression?
failures.add(
fail(
exp,
"expected an aggregate function or group but got [" + exp.sourceText() + "] of type [" + exp.nodeName() + "]"
)
);
}
});

// check grouping
// The grouping can not be an aggregate function
agg.groupings().forEach(e -> e.forEachUp(g -> {
if (g instanceof AggregateFunction af) {
failures.add(fail(g, "cannot use an aggregate [{}] for grouping", af));
}
}));
}
}

Expand Down Expand Up @@ -214,12 +214,17 @@ private static void checkRow(LogicalPlan p, Set<Failure> failures) {
private static void checkEvalFields(LogicalPlan p, Set<Failure> failures) {
if (p instanceof Eval eval) {
eval.fields().forEach(field -> {
// check supported types
DataType dataType = field.dataType();
if (EsqlDataTypes.isRepresentable(dataType) == false) {
failures.add(
fail(field, "EVAL does not support type [{}] in expression [{}]", dataType.typeName(), field.child().sourceText())
);
}
// check no aggregate functions are used
field.forEachDown(AggregateFunction.class, af -> {
failures.add(fail(af, "aggregate function [{}] not allowed outside STATS command", af.sourceText()));
});
});
}
}
Expand Down

0 comments on commit 607185b

Please sign in to comment.