elastic · costin · Dec 20, 2018 · Dec 18, 2018 · Dec 20, 2018 · Dec 20, 2018
diff --git a/docs/reference/sql/functions/grouping.asciidoc b/docs/reference/sql/functions/grouping.asciidoc
@@ -36,6 +36,8 @@ The histogram function takes all matching values and divides them into buckets w
 bucket_key = Math.floor(value / interval) * interval
 ----
 
+NOTE:: The histogram in SQL does *NOT* return empty buckets for missing intervals as the traditional <<search-aggregations-bucket-histogram-aggregation, histogram>> and  <<search-aggregations-bucket-datehistogram-aggregation, date histogram>>. Such behavior does not fit conceptually in SQL which treats all missing values as `NULL`; as such the histogram places all missing values in the `NULL` group.
+
 `Histogram` can be applied on either numeric fields:
 
 
@@ -51,4 +53,26 @@ or date/time fields:
 include-tagged::{sql-specs}/docs.csv-spec[histogramDate]
 ----
 
+Expressions inside the histogram are also supported as long as the
+return type is numeric:
+
+["source","sql",subs="attributes,callouts,macros"]
+----
+include-tagged::{sql-specs}/docs.csv-spec[histogramNumericExpression]
+----
+
+Do note that histograms (and grouping functions in general) allow custom expressions but cannot have any functions applied to them in the `GROUP BY`.In other words, the following statement is *NOT* allowed:
 
+["source","sql",subs="attributes,callouts,macros"]
+----
+include-tagged::{sql-specs}/docs.csv-spec[expressionOnHistogramNotAllowed]
+----
+
+as it requires two groupings (one for histogram and then another for apply the function on top of the histogram groups).
+
+Instead one can rewrite the query to move the expression on the histogram _inside_ of it:
+
+["source","sql",subs="attributes,callouts,macros"]
+----
+include-tagged::{sql-specs}/docs.csv-spec[histogramDateExpression]
+----
diff --git a/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec
@@ -262,9 +262,51 @@ SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp
 null                |10   
 ;
 
-histogramDateWithDateFunction-Ignore
-SELECT YEAR(HISTOGRAM(birth_date, INTERVAL 1 YEAR)) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
+histogramDateWithMonthOnTop
+schema::h:i|c:l
+SELECT HISTOGRAM(MONTH(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
+
+       h       |       c       
+---------------+---------------
+12             |7              
+10             |17             
+8              |16             
+6              |16             
+4              |18             
+2              |10             
+0              |6              
+null           |10    
+;
+
+histogramDateWithYearOnTop
+schema::h:i|c:l
+SELECT HISTOGRAM(MONTH(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
+       h       |       c       
+---------------+---------------
+1964           |5              
+1962           |13             
+1960           |16             
+1958           |16             
+1956           |9              
+1954           |12             
+1952           |19             
+null           |10   
+;
 
-
-
+histogramNumericWithExpression
+schema::h:i|c:l
+SELECT HISTOGRAM(emp_no % 100, 10) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
+
+       h       |       c       
+---------------+---------------
+90             |10             
+80             |10             
+70             |10             
+60             |10             
+50             |10             
+40             |10             
+30             |10             
+20             |10             
+10             |10             
+0              |10   
 ;
diff --git a/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec
@@ -725,6 +725,27 @@ SELECT HISTOGRAM(salary, 5000) AS h FROM emp GROUP BY h;
 // end::histogramNumeric  
 ;
 
+histogramNumericExpression
+schema::h:i|c:l
+// tag::histogramNumericExpression
+SELECT HISTOGRAM(salary % 100, 10) AS h, COUNT(*) AS c FROM emp GROUP BY h;
+
+       h       |       c       
+---------------+---------------
+0              |10             
+10             |15             
+20             |10             
+30             |14             
+40             |9              
+50             |9              
+60             |8              
+70             |13             
+80             |3              
+90             |9    
+
+// end::histogramNumericExpression  
+;
+
 histogramDate
 schema::h:ts|c:l
 // tag::histogramDate
@@ -752,6 +773,30 @@ null                |10
 // end::histogramDate
 ;
 
+expressionOnHistogramNotAllowed-Ignore
+// tag::expressionOnHistogramNotAllowed
+SELECT HISTOGRAM(MONTH(birth_date), 2) AS h, COUNT(*) as c FROM emp GROUP BY h ORDER BY h DESC;
+// end::expressionOnHistogramNotAllowed
+
+histogramDateExpression
+schema::h:i|c:l
+// tag::histogramDateExpression
+SELECT HISTOGRAM(MONTH(birth_date), 2) AS h, COUNT(*) as c FROM emp GROUP BY h ORDER BY h DESC;
+
+       h       |       c       
+---------------+---------------
+12             |7              
+10             |17             
+8              |16             
+6              |16             
+4              |18             
+2              |10             
+0              |6              
+null           |10 
+
+// end::histogramDateExpression   
+;
+
 ///////////////////////////////
 //
 // Date/Time

diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java
@@ -18,6 +18,8 @@
 import org.elasticsearch.xpack.sql.expression.function.FunctionAttribute;
 import org.elasticsearch.xpack.sql.expression.function.Functions;
 import org.elasticsearch.xpack.sql.expression.function.Score;
+import org.elasticsearch.xpack.sql.expression.function.aggregate.AggregateFunctionAttribute;
+import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute;
 import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunction;
 import org.elasticsearch.xpack.sql.expression.predicate.conditional.ConditionalFunction;
 import org.elasticsearch.xpack.sql.expression.predicate.operator.comparison.In;
@@ -224,6 +226,7 @@ Collection<Failure> verify(LogicalPlan plan) {
                 validateConditional(p, localFailures);
 
                 checkFilterOnAggs(p, localFailures);
+                checkFilterOnGrouping(p, localFailures);
 
                 if (!groupingFailures.contains(p)) {
                     checkGroupBy(p, localFailures, resolvedFunctions, groupingFailures);
@@ -419,7 +422,7 @@ private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Node<?> sourc
             return true;
         }
         // skip aggs (allowed to refer to non-group columns)
-        if (Functions.isAggregate(e)) {
+        if (Functions.isAggregate(e) || Functions.isGrouping(e)) {
             return true;
         }
 
@@ -448,6 +451,21 @@ private static boolean checkGroupByAgg(LogicalPlan p, Set<Failure> localFailures
                 }
             }));
 
+            a.groupings().forEach(e -> {
+                if (Functions.isGrouping(e) == false) {
+                    e.collectFirstChildren(c -> {
+                        if (Functions.isGrouping(c)) {
+                            localFailures.add(fail(c,
+                                    "Cannot combine [%s] grouping function inside GROUP BY, found [%s];"
+                                            + " consider moving the expression inside the histogram",
+                                    Expressions.name(c), Expressions.name(e)));
+                            return true;
+                        }
+                        return false;
+                    });
+                }
+            });
+
             if (!localFailures.isEmpty()) {
                 return false;
             }
@@ -547,19 +565,30 @@ private static void checkFilterOnAggs(LogicalPlan p, Set<Failure> localFailures)
         if (p instanceof Filter) {
             Filter filter = (Filter) p;
             if ((filter.child() instanceof Aggregate) == false) {
-                filter.condition().forEachDown(f -> {
-                    if (Functions.isAggregate(f) || Functions.isGrouping(f)) {
-                        String type = Functions.isAggregate(f) ? "aggregate" : "grouping";
-                        localFailures.add(fail(f,
-                                "Cannot use WHERE filtering on %s function [%s], use HAVING instead", type, Expressions.name(f)));
+                filter.condition().forEachDown(e -> {
+                    if (Functions.isAggregate(e) || e instanceof AggregateFunctionAttribute) {
+                        localFailures.add(
+                                fail(e, "Cannot use WHERE filtering on aggregate function [%s], use HAVING instead", Expressions.name(e)));
                     }
-
-                }, Function.class);
+                }, Expression.class);
             }
         }
     }
 
 
+    private static void checkFilterOnGrouping(LogicalPlan p, Set<Failure> localFailures) {
+        if (p instanceof Filter) {
+            Filter filter = (Filter) p;
+            filter.condition().forEachDown(e -> {
+                if (Functions.isGrouping(e) || e instanceof GroupingFunctionAttribute) {
+                    localFailures
+                            .add(fail(e, "Cannot filter on grouping function [%s], use the grouped field instead", Expressions.name(e)));
+                }
+            }, Expression.class);
+        }
+    }
+
+
     private static void checkForScoreInsideFunctions(LogicalPlan p, Set<Failure> localFailures) {
         // Make sure that SCORE is only used in "top level" functions
         p.forEachExpressions(e ->
@@ -647,4 +676,4 @@ private static boolean areTypesCompatible(DataType left, DataType right) {
                 (left.isNumeric() && right.isNumeric());
         }
     }
-}
+}
diff --git a/.../elasticsearch/xpack/sql/expression/function/scalar/whitelist/InternalSqlScriptUtils.java b/.../elasticsearch/xpack/sql/expression/function/scalar/whitelist/InternalSqlScriptUtils.java
@@ -346,6 +346,9 @@ public static Integer weekOfYear(Object dateTime, String tzId) {
     }
 
     public static ZonedDateTime asDateTime(Object dateTime) {
+        if (dateTime == null) {
+            return null;
+        }
         if (dateTime instanceof JodaCompatibleZonedDateTime) {
             return ((JodaCompatibleZonedDateTime) dateTime).getZonedDateTime();
         }

diff --git a/.../plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/Grouping.java b/.../plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/Grouping.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.sql.expression.gen.script;
+
+import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute;
+
+class Grouping extends Param<GroupingFunctionAttribute> {
+
+    Grouping(GroupingFunctionAttribute groupRef) {
+        super(groupRef);
+    }
+
+    String groupName() {
+        return value().functionId();
+    }
+
+    @Override
+    public String prefix() {
+        return "g";
+    }
+}
diff --git a/...ck/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/Params.java b/...ck/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/Params.java
@@ -85,25 +85,15 @@ Map<String, String> asAggPaths() {
                 String s = a.aggProperty() != null ? a.aggProperty() : a.aggName();
                 map.put(p.prefix() + aggs++, s);
             }
-        }
-
-        return map;
-    }
-
-    // return the agg refs
-    List<String> asAggRefs() {
-        List<String> refs = new ArrayList<>();
-
-        for (Param<?> p : params) {
-            if (p instanceof Agg) {
-                refs.add(((Agg) p).aggName());
+            if (p instanceof Grouping) {
+                Grouping g = (Grouping) p;
+                map.put(p.prefix() + aggs++, g.groupName());
             }
         }
 
-        return refs;
+        return map;
     }
 
-
     private static List<Param<?>> flatten(List<Param<?>> params) {
         List<Param<?>> flatten = emptyList();
 
@@ -116,6 +106,9 @@ private static List<Param<?>> flatten(List<Param<?>> params) {
                 else if (p instanceof Agg) {
                     flatten.add(p);
                 }
+                else if (p instanceof Grouping) {
+                    flatten.add(p);
+                }
                 else if (p instanceof Var) {
                     flatten.add(p);
                 }
@@ -131,4 +124,4 @@ else if (p instanceof Var) {
     public String toString() {
         return params.toString();
     }
-}
+}
diff --git a/...in/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/ParamsBuilder.java b/...in/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/ParamsBuilder.java
@@ -6,6 +6,7 @@
 package org.elasticsearch.xpack.sql.expression.gen.script;
 
 import org.elasticsearch.xpack.sql.expression.function.aggregate.AggregateFunctionAttribute;
+import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -28,6 +29,11 @@ public ParamsBuilder agg(AggregateFunctionAttribute agg) {
         return this;
     }
 
+    public ParamsBuilder grouping(GroupingFunctionAttribute grouping) {
+        params.add(new Grouping(grouping));
+        return this;
+    }
+
     public ParamsBuilder script(Params ps) {
         params.add(new Script(ps));
         return this;

diff --git a/...n/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/ScriptTemplate.java b/...n/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/ScriptTemplate.java
@@ -44,10 +44,6 @@ public Params params() {
         return params;
     }
 
-    public List<String> aggRefs() {
-        return params.asAggRefs();
-    }
-
     public Map<String, String> aggPaths() {
         return params.asAggPaths();
     }

diff --git a/...gin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/ScriptWeaver.java b/...gin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/ScriptWeaver.java
@@ -12,6 +12,7 @@
 import org.elasticsearch.xpack.sql.expression.Expressions;
 import org.elasticsearch.xpack.sql.expression.FieldAttribute;
 import org.elasticsearch.xpack.sql.expression.function.aggregate.AggregateFunctionAttribute;
+import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute;
 import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunctionAttribute;
 import org.elasticsearch.xpack.sql.expression.literal.IntervalDayTime;
 import org.elasticsearch.xpack.sql.expression.literal.IntervalYearMonth;
@@ -37,6 +38,9 @@ default ScriptTemplate asScript(Expression exp) {
             if (attr instanceof AggregateFunctionAttribute) {
                 return scriptWithAggregate((AggregateFunctionAttribute) attr);
             }
+            if (attr instanceof GroupingFunctionAttribute) {
+                return scriptWithGrouping((GroupingFunctionAttribute) attr);
+            }
             if (attr instanceof FieldAttribute) {
                 return scriptWithField((FieldAttribute) attr);
             }
@@ -83,6 +87,16 @@ default ScriptTemplate scriptWithAggregate(AggregateFunctionAttribute aggregate)
                 dataType());
     }
 
+    default ScriptTemplate scriptWithGrouping(GroupingFunctionAttribute grouping) {
+        String template = "{}";
+        if (grouping.dataType() == DataType.DATE) {
+            template = "{sql}.asDateTime({})";
+        }
+        return new ScriptTemplate(processScript(template),
+                paramsBuilder().grouping(grouping).build(),
+                dataType());
+    }
+
     default ScriptTemplate scriptWithField(FieldAttribute field) {
         return new ScriptTemplate(processScript("doc[{}].value"),
                 paramsBuilder().variable(field.name()).build(),