apache · gianm · Jan 16, 2020 · Jan 1, 2020 · Jan 6, 2020 · Jan 16, 2020
diff --git a/processing/src/main/java/org/apache/druid/query/BaseQuery.java b/processing/src/main/java/org/apache/druid/query/BaseQuery.java
@@ -230,6 +230,7 @@ public Ordering<T> getResultOrdering()
     return descending ? retVal.reverse() : retVal;
   }
 
+  @Nullable
   @Override
   public String getId()
   {

diff --git a/processing/src/main/java/org/apache/druid/query/Query.java b/processing/src/main/java/org/apache/druid/query/Query.java
@@ -120,6 +120,7 @@ public interface Query<T>
 
   Query<T> withId(String id);
 
+  @Nullable
   String getId();
 
   default Query<T> withSqlQueryId(String sqlQueryId)

diff --git a/processing/src/main/java/org/apache/druid/query/QueryToolChest.java b/processing/src/main/java/org/apache/druid/query/QueryToolChest.java
@@ -26,6 +26,7 @@
 import com.google.common.base.Function;
 import org.apache.druid.guice.annotations.ExtensionPoint;
 import org.apache.druid.java.util.common.UOE;
+import org.apache.druid.java.util.common.guava.Sequence;
 import org.apache.druid.query.aggregation.MetricManipulationFn;
 import org.apache.druid.timeline.LogicalSegment;
 
@@ -138,7 +139,7 @@ public Comparator<ResultType> createResultComparator(Query<ResultType> query)
    * to allow for query-specific dimensions and metrics.  That is, the ToolChest is expected to set some
    * meaningful dimensions for metrics given this query type.  Examples might be the topN threshold for
    * a TopN query or the number of dimensions included for a groupBy query.
-   * 
+   *
    * <p>QueryToolChests for query types in core (druid-processing) and public extensions (belonging to the Druid source
    * tree) should use delegate this method to {@link GenericQueryMetricsFactory#makeMetrics(Query)} on an injected
    * instance of {@link GenericQueryMetricsFactory}, as long as they don't need to emit custom dimensions and/or
@@ -269,4 +270,50 @@ public <T extends LogicalSegment> List<T> filterSegments(QueryType query, List<T
   {
     return segments;
   }
+
+  /**
+   * Returns a list of field names in the order than {@link #resultsAsArrays} would return them. The returned list will
+   * be the same length as each array returned by {@link #resultsAsArrays}.
+   *
+   * @param query same query passed to {@link #resultsAsArrays}
+   *
+   * @return list of field names
+   *
+   * @throws UnsupportedOperationException if this query type does not support returning results as arrays
+   */
+  public List<String> resultArrayFields(QueryType query)
+  {
+    throw new UOE("Query type '%s' does not support returning results as arrays", query.getType());
+  }
+
+  /**
+   * Converts a sequence of this query's ResultType into arrays. The array schema is given by
+   * {@link #resultArrayFields}. This functionality is useful because it allows higher-level processors to operate on
+   * the results of any query in a consistent way. This is useful for the SQL layer and for any algorithm that might
+   * operate on the results of an inner query.
+   *
+   * Not all query types support this method. They will throw {@link UnsupportedOperationException}, and they cannot
+   * be used by the SQL layer or by generic higher-level algorithms.
+   *
+   * Some query types return less information after translating their results into arrays, especially in situations
+   * where there is no clear way to translate fully rich results into flat arrays. For example, the scan query does not
+   * include the segmentId in its array-based results, because it could potentially conflict with a 'segmentId' field
+   * in the actual datasource being scanned.
+   *
+   * It is possible that there will be multiple arrays returned for a single result object. For example, in the topN
+   * query, each {@link org.apache.druid.query.topn.TopNResultValue} will generate a separate array for each of its
+   * {@code values}.
+   *
+   * By convention, the array form should include the __time column, if present,  as a long (milliseconds since epoch).
+   *
+   * @param resultSequence results of the form returned by {@link #mergeResults}
+   *
+   * @return results in array form
+   *
+   * @throws UnsupportedOperationException if this query type does not support returning results as arrays
+   */
+  public Sequence<Object[]> resultsAsArrays(QueryType query, Sequence<ResultType> resultSequence)
+  {
+    throw new UOE("Query type '%s' does not support returning results as arrays", query.getType());
+  }
 }
diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChest.java
@@ -677,6 +677,17 @@ public ResultRow apply(Object input)
     };
   }
 
+  @Override
+  public List<String> resultArrayFields(final GroupByQuery query)
+  {
+    return query.getResultRowOrder();
+  }
+
+  @Override
+  public Sequence<Object[]> resultsAsArrays(final GroupByQuery query, final Sequence<ResultRow> resultSequence)
+  {
+    return resultSequence.map(ResultRow::getArray);
+  }
 
   /**
    * This function checks the query for dimensions which can be optimized by applying the dimension extraction

diff --git a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryEngine.java b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryEngine.java
@@ -56,7 +56,7 @@
 
 public class ScanQueryEngine
 {
-  private static final String LEGACY_TIMESTAMP_KEY = "timestamp";
+  static final String LEGACY_TIMESTAMP_KEY = "timestamp";
 
   public Sequence<ScanResultValue> process(
       final ScanQuery query,
@@ -202,9 +202,9 @@ public void remove()
                             throw new UnsupportedOperationException();
                           }
 
-                          private List<Object> rowsToCompactedList()
+                          private List<List<Object>> rowsToCompactedList()
                           {
-                            final List<Object> events = new ArrayList<>(batchSize);
+                            final List<List<Object>> events = new ArrayList<>(batchSize);
                             final long iterLimit = Math.min(limit, offset + batchSize);
                             for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
                               final List<Object> theEvent = new ArrayList<>(allColumns.size());

diff --git a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryQueryToolChest.java
@@ -22,9 +22,14 @@
 import com.fasterxml.jackson.core.type.TypeReference;
 import com.google.common.base.Function;
 import com.google.common.base.Functions;
+import com.google.common.collect.Iterables;
 import com.google.inject.Inject;
+import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.java.util.common.UOE;
 import org.apache.druid.java.util.common.guava.BaseSequence;
 import org.apache.druid.java.util.common.guava.CloseQuietly;
+import org.apache.druid.java.util.common.guava.Sequence;
+import org.apache.druid.java.util.common.guava.Sequences;
 import org.apache.druid.query.GenericQueryMetricsFactory;
 import org.apache.druid.query.Query;
 import org.apache.druid.query.QueryMetrics;
@@ -33,6 +38,11 @@
 import org.apache.druid.query.QueryToolChest;
 import org.apache.druid.query.aggregation.MetricManipulationFn;
 
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
 public class ScanQueryQueryToolChest extends QueryToolChest<ScanResultValue, ScanQuery>
 {
   private static final TypeReference<ScanResultValue> TYPE_REFERENCE = new TypeReference<ScanResultValue>()
@@ -115,4 +125,69 @@ public QueryRunner<ScanResultValue> preMergeQueryDecoration(final QueryRunner<Sc
       return runner.run(queryPlus, responseContext);
     };
   }
+
+  @Override
+  public List<String> resultArrayFields(final ScanQuery query)
+  {
+    if (query.getColumns() == null || query.getColumns().isEmpty()) {
+      // Note: if no specific list of columns is provided, then since we can't predict what columns will come back, we
+      // unfortunately can't do array-based results. In this case, there is a major difference between standard and
+      // array-based results: the standard results will detect and return _all_ columns, whereas the array-based results
+      // will include none of them.
+      return Collections.emptyList();
+    } else if (query.withNonNullLegacy(scanQueryConfig).isLegacy()) {
+      final List<String> retVal = new ArrayList<>();
+      retVal.add(ScanQueryEngine.LEGACY_TIMESTAMP_KEY);
+      retVal.addAll(query.getColumns());
+      return retVal;
+    } else {
+      return query.getColumns();
+    }
+  }
+
+  @Override
+  public Sequence<Object[]> resultsAsArrays(final ScanQuery query, final Sequence<ScanResultValue> resultSequence)
+  {
+    final List<String> fields = resultArrayFields(query);
+    final Function<?, Object[]> mapper;
+
+    switch (query.getResultFormat()) {
+      case RESULT_FORMAT_LIST:
+        mapper = (Map<String, Object> row) -> {
+          final Object[] rowArray = new Object[fields.size()];
+
+          for (int i = 0; i < fields.size(); i++) {
+            rowArray[i] = row.get(fields.get(i));
+          }
+
+          return rowArray;
+        };
+        break;
+      case RESULT_FORMAT_COMPACTED_LIST:
+        mapper = (List<Object> row) -> {
+          if (row.size() == fields.size()) {
+            return row.toArray();
+          } else if (fields.isEmpty()) {
+            return new Object[0];
+          } else {
+            // Uh oh... mismatch in expected and actual field count. I don't think this should happen, so let's
+            // throw an exception. If this really does happen, and there's a good reason for it, then we should remap
+            // the result row here.
+            throw new ISE("Mismatch in expected[%d] vs actual[%s] field count", fields.size(), row.size());
+          }
+        };
+        break;
+      default:
+        throw new UOE("Unsupported resultFormat for array-based results: %s", query.getResultFormat());
+    }
+
+    return resultSequence.flatMap(
+        result -> {
+          // Generics? Where we're going, we don't need generics.
+          final List rows = (List) result.getEvents();
+          final Iterable arrays = Iterables.transform(rows, (Function) mapper);
+          return Sequences.simple(arrays);
+        }
+    );
+  }
 }
diff --git a/...essing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java b/...essing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java
@@ -50,8 +50,10 @@
 import org.apache.druid.query.cache.CacheKeyBuilder;
 import org.apache.druid.query.context.ResponseContext;
 import org.apache.druid.query.groupby.RowBasedColumnSelectorFactory;
+import org.apache.druid.segment.column.ColumnHolder;
 import org.joda.time.DateTime;
 
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -402,6 +404,47 @@ public Function<Result<TimeseriesResultValue>, Result<TimeseriesResultValue>> ma
     return makeComputeManipulatorFn(query, fn, true);
   }
 
+  @Override
+  public List<String> resultArrayFields(TimeseriesQuery query)
+  {
+    final List<String> fields = new ArrayList<>(
+        1 + query.getAggregatorSpecs().size() + query.getPostAggregatorSpecs().size()
+    );
+
+    fields.add(ColumnHolder.TIME_COLUMN_NAME);
+    query.getAggregatorSpecs().stream().map(AggregatorFactory::getName).forEach(fields::add);
+    query.getPostAggregatorSpecs().stream().map(PostAggregator::getName).forEach(fields::add);
+
+    return fields;
+  }
+
+  @Override
+  public Sequence<Object[]> resultsAsArrays(
+      final TimeseriesQuery query,
+      final Sequence<Result<TimeseriesResultValue>> resultSequence
+  )
+  {
+    final List<String> fields = resultArrayFields(query);
+
+    return Sequences.map(
+        resultSequence,
+        result -> {
+          final Object[] retVal = new Object[fields.size()];
+
+          // Position 0 is always __time.
+          retVal[0] = result.getTimestamp().getMillis();
+
+          // Add other fields.
+          final Map<String, Object> resultMap = result.getValue().getBaseObject();
+          for (int i = 1; i < fields.size(); i++) {
+            retVal[i] = resultMap.get(fields.get(i));
+          }
+
+          return retVal;
+        }
+    );
+  }
+
   private Function<Result<TimeseriesResultValue>, Result<TimeseriesResultValue>> makeComputeManipulatorFn(
       final TimeseriesQuery query,
       final MetricManipulationFn fn,

diff --git a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryQueryToolChest.java
@@ -49,8 +49,10 @@
 import org.apache.druid.query.dimension.DefaultDimensionSpec;
 import org.apache.druid.query.dimension.DimensionSpec;
 import org.apache.druid.segment.DimensionHandlerUtils;
+import org.apache.druid.segment.column.ColumnHolder;
 import org.joda.time.DateTime;
 
+import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.List;
@@ -526,6 +528,53 @@ public DimensionAndMetricValueExtractor apply(
     };
   }
 
+  @Override
+  public List<String> resultArrayFields(TopNQuery query)
+  {
+    final List<String> fields = new ArrayList<>(
+        2 + query.getAggregatorSpecs().size() + query.getPostAggregatorSpecs().size()
+    );
+
+    fields.add(ColumnHolder.TIME_COLUMN_NAME);
+    fields.add(query.getDimensionSpec().getOutputName());
+    query.getAggregatorSpecs().stream().map(AggregatorFactory::getName).forEach(fields::add);
+    query.getPostAggregatorSpecs().stream().map(PostAggregator::getName).forEach(fields::add);
+
+    return fields;
+  }
+
+  @Override
+  public Sequence<Object[]> resultsAsArrays(TopNQuery query, Sequence<Result<TopNResultValue>> resultSequence)
+  {
+    final List<String> fields = resultArrayFields(query);
+
+    return resultSequence.flatMap(
+        result -> {
+          final List<DimensionAndMetricValueExtractor> rows = result.getValue().getValue();
+
+          return Sequences.simple(
+              Iterables.transform(
+                  rows,
+                  row -> {
+                    final Object[] retVal = new Object[fields.size()];
+
+                    // Position 0 is always __time.
+                    retVal[0] = result.getTimestamp().getMillis();
+
+                    // Add other fields.
+                    final Map<String, Object> resultMap = row.getBaseObject();
+                    for (int i = 1; i < fields.size(); i++) {
+                      retVal[i] = resultMap.get(fields.get(i));
+                    }
+
+                    return retVal;
+                  }
+              )
+          );
+        }
+    );
+  }
+
   static class ThresholdAdjustingQueryRunner implements QueryRunner<Result<TopNResultValue>>
   {
     private final QueryRunner<Result<TopNResultValue>> runner;

diff --git a/processing/src/test/java/org/apache/druid/query/QueryToolChestTestHelper.java b/processing/src/test/java/org/apache/druid/query/QueryToolChestTestHelper.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query;
+
+import org.apache.druid.java.util.common.guava.Sequence;
+import org.junit.Assert;
+
+import java.util.List;
+
+public class QueryToolChestTestHelper
+{
+  public static void assertArrayResultsEquals(final List<Object[]> expected, final Sequence<Object[]> actual)
+  {
+    final List<Object[]> actualList = actual.toList();
+    Assert.assertEquals("number of results", expected.size(), actualList.size());
+    for (int i = 0; i < actualList.size(); i++) {
+      Assert.assertArrayEquals("result #" + i, expected.get(i), actualList.get(i));
+    }
+  }
+}