apache · jihoonson · Apr 28, 2018 · Feb 2, 2018 · Feb 3, 2018 · Feb 3, 2018
diff --git a/api/src/main/java/io/druid/data/input/Row.java b/api/src/main/java/io/druid/data/input/Row.java
@@ -65,7 +65,7 @@ public interface Row extends Comparable<Row>
 
   /**
    * Returns the raw dimension value for the given column name. This is different from {@link #getDimension} which
-   * all values to strings before returning them.
+   * converts all values to strings before returning them.
    *
    * @param dimension the column name of the dimension requested
    *

diff --git a/benchmarks/src/main/java/io/druid/benchmark/FloatCompressionBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/FloatCompressionBenchmark.java
@@ -19,8 +19,6 @@
 
 package io.druid.benchmark;
 
-// Run FloatCompressionBenchmarkFileGenerator to generate the required files before running this benchmark
-
 import com.google.common.base.Supplier;
 import com.google.common.io.Files;
 import io.druid.segment.data.ColumnarFloats;
@@ -44,6 +42,9 @@
 import java.util.Random;
 import java.util.concurrent.TimeUnit;
 
+/**
+ * Run {@link FloatCompressionBenchmarkFileGenerator} to generate the required files before running this benchmark
+ */
 @State(Scope.Benchmark)
 @Fork(value = 1)
 @Warmup(iterations = 10)

diff --git a/benchmarks/src/main/java/io/druid/benchmark/LongCompressionBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/LongCompressionBenchmark.java
@@ -42,8 +42,9 @@
 import java.util.Random;
 import java.util.concurrent.TimeUnit;
 
-// Run LongCompressionBenchmarkFileGenerator to generate the required files before running this benchmark
-
+/**
+ * Run {@link LongCompressionBenchmarkFileGenerator} to generate the required files before running this benchmark
+ */
 @State(Scope.Benchmark)
 @Fork(value = 1)
 @Warmup(iterations = 10)

diff --git a/...ontrib/orc-extensions/src/test/java/io/druid/data/input/orc/OrcIndexGeneratorJobTest.java b/...ontrib/orc-extensions/src/test/java/io/druid/data/input/orc/OrcIndexGeneratorJobTest.java
@@ -45,7 +45,7 @@
 import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory;
 import io.druid.segment.QueryableIndex;
 import io.druid.segment.QueryableIndexIndexableAdapter;
-import io.druid.segment.Rowboat;
+import io.druid.segment.RowIterator;
 import io.druid.segment.indexing.DataSchema;
 import io.druid.segment.indexing.granularity.UniformGranularitySpec;
 import io.druid.timeline.DataSegment;
@@ -309,11 +309,11 @@ private void verifyJob(IndexGeneratorJob job) throws IOException
         QueryableIndex index = HadoopDruidIndexerConfig.INDEX_IO.loadIndex(dir);
         QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(index);
 
-        for (Rowboat row : adapter.getRows()) {
-          Object[] metrics = row.getMetrics();
-
-          rowCount++;
-          Assert.assertTrue(metrics.length == 2);
+        try (RowIterator rowIt = adapter.getRows()) {
+          while (rowIt.moveToNext()) {
+            rowCount++;
+            Assert.assertEquals(2, rowIt.getPointer().getNumMetrics());
+          }
         }
       }
       Assert.assertEquals(rowCount, data.size());

diff --git a/...t/java/io/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorTest.java b/...t/java/io/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorTest.java
@@ -254,8 +254,7 @@ public void buildingSketchesAtIngestionTime() throws Exception
     double[] histogram = (double[]) histogramObject;
     Assert.assertEquals(4, histogram.length);
     for (final double bin : histogram) {
-      Assert.assertEquals(100, bin, 100 * 0.2); // 400 items uniformly
-      // distributed into 4 bins
+      Assert.assertEquals(100, bin, 100 * 0.2); // 400 items uniformly distributed into 4 bins
     }
   }
 

diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java b/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java
@@ -306,14 +306,7 @@ public static final SerializeResult toBytes(
       }
 
       //writing all metrics
-      Supplier<InputRow> supplier = new Supplier<InputRow>()
-      {
-        @Override
-        public InputRow get()
-        {
-          return row;
-        }
-      };
+      Supplier<InputRow> supplier = () -> row;
       WritableUtils.writeVInt(out, aggs.length);
       for (AggregatorFactory aggFactory : aggs) {
         String k = aggFactory.getName();

diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/AppendTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/AppendTask.java
@@ -23,7 +23,6 @@
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
-import com.google.common.base.Predicate;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import io.druid.indexing.common.TaskToolbox;
@@ -35,8 +34,8 @@
 import io.druid.segment.IndexSpec;
 import io.druid.segment.IndexableAdapter;
 import io.druid.segment.QueryableIndexIndexableAdapter;
-import io.druid.segment.Rowboat;
-import io.druid.segment.RowboatFilteringIndexAdapter;
+import io.druid.segment.RowFilteringIndexAdapter;
+import io.druid.segment.RowPointer;
 import io.druid.timeline.DataSegment;
 import io.druid.timeline.TimelineObjectHolder;
 import io.druid.timeline.VersionedIntervalTimeline;
@@ -120,18 +119,9 @@ public SegmentToMergeHolder apply(PartitionChunk<DataSegment> chunkInput)
     List<IndexableAdapter> adapters = Lists.newArrayList();
     for (final SegmentToMergeHolder holder : segmentsToMerge) {
       adapters.add(
-          new RowboatFilteringIndexAdapter(
-              new QueryableIndexIndexableAdapter(
-                  toolbox.getIndexIO().loadIndex(holder.getFile())
-              ),
-              new Predicate<Rowboat>()
-              {
-                @Override
-                public boolean apply(Rowboat input)
-                {
-                  return holder.getInterval().contains(input.getTimestamp());
-                }
-              }
+          new RowFilteringIndexAdapter(
+              new QueryableIndexIndexableAdapter(toolbox.getIndexIO().loadIndex(holder.getFile())),
+              (RowPointer rowPointer) -> holder.getInterval().contains(rowPointer.getTimestamp())
           )
       );
     }

diff --git a/processing/src/main/java/io/druid/query/aggregation/AggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/AggregatorFactory.java
@@ -70,7 +70,6 @@ public abstract class AggregatorFactory implements Cacheable
    * @see AggregateCombiner
    * @see io.druid.segment.IndexMerger
    */
-  @SuppressWarnings("unused") // Going to be used when https://github.com/druid-io/druid/projects/2 is complete
   public AggregateCombiner makeAggregateCombiner()
   {
     throw new UOE("[%s] does not implement makeAggregateCombiner()", this.getClass().getName());

diff --git a/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java
@@ -28,6 +28,12 @@
 import javax.annotation.Nullable;
 
 /**
+ * An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension).
+ * Note that ExtractionFn implementations are expected to be Threadsafe.
+ *
+ * A simple example of the type of operation this enables is the RegexDimExtractionFn which applies a
+ * regular expression with a capture group.  When the regular expression matches the value of a dimension,
+ * the value captured by the group is used for grouping operations instead of the dimension value.
  */
 @ExtensionPoint
 @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type")
@@ -48,14 +54,6 @@
     @JsonSubTypes.Type(name = "bucket", value = BucketExtractionFn.class),
     @JsonSubTypes.Type(name = "strlen", value = StrlenExtractionFn.class)
 })
-/**
- * An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension).
- * Note that ExtractionFn implementations are expected to be Threadsafe.
- *
- * A simple example of the type of operation this enables is the RegexDimExtractionFn which applies a
- * regular expression with a capture group.  When the regular expression matches the value of a dimension,
- * the value captured by the group is used for grouping operations instead of the dimension value.
- */
 public interface ExtractionFn extends Cacheable
 {
   /**

diff --git a/processing/src/main/java/io/druid/query/groupby/GroupByQuery.java b/processing/src/main/java/io/druid/query/groupby/GroupByQuery.java
@@ -211,6 +211,7 @@ public VirtualColumns getVirtualColumns()
     return virtualColumns;
   }
 
+  @Nullable
   @JsonProperty("filter")
   public DimFilter getDimFilter()
   {

diff --git a/processing/src/main/java/io/druid/segment/ColumnValueSelector.java b/processing/src/main/java/io/druid/segment/ColumnValueSelector.java
@@ -37,4 +37,5 @@
 public interface ColumnValueSelector<T> extends BaseLongColumnValueSelector, BaseDoubleColumnValueSelector,
     BaseFloatColumnValueSelector, BaseObjectColumnValueSelector<T>
 {
+  ColumnValueSelector[] EMPTY_ARRAY = new ColumnValueSelector[0];
 }
diff --git a/processing/src/main/java/io/druid/segment/Cursor.java b/processing/src/main/java/io/druid/segment/Cursor.java
@@ -22,8 +22,15 @@
 import org.joda.time.DateTime;
 
 /**
+ * Cursor is an interface for iteration over a range of data points, used during query execution. {@link
+ * QueryableIndexStorageAdapter.QueryableIndexCursor} is an implementation for historical segments, and {@link
+ * io.druid.segment.incremental.IncrementalIndexStorageAdapter.IncrementalIndexCursor} is an implementation for {@link
+ * io.druid.segment.incremental.IncrementalIndex}.
+ *
+ * Cursor is conceptually similar to {@link TimeAndDimsPointer}, but the latter is used for historical segment creation
+ * rather than query execution (as Cursor). If those abstractions could be collapsed (and if it is worthwhile) is yet to
+ * be determined.
  */
-
 public interface Cursor
 {
   ColumnSelectorFactory getColumnSelectorFactory();

diff --git a/processing/src/main/java/io/druid/segment/DimensionHandler.java b/processing/src/main/java/io/druid/segment/DimensionHandler.java
@@ -20,12 +20,11 @@
 package io.druid.segment;
 
 import io.druid.data.input.impl.DimensionSchema.MultiValueHandling;
-import io.druid.segment.column.Column;
 import io.druid.segment.column.ColumnCapabilities;
-import io.druid.segment.data.Indexed;
+import io.druid.segment.selector.settable.SettableColumnValueSelector;
 import io.druid.segment.writeout.SegmentWriteOutMedium;
 
-import java.io.Closeable;
+import java.util.Comparator;
 
 /**
  * Processing related interface
@@ -40,10 +39,8 @@
  *
  * This interface allows type-specific behavior column logic, such as choice of indexing structures and disk formats.
  * to be contained within a type-specific set of handler objects, simplifying processing classes
- * such as IncrementalIndex and IndexMerger and allowing for abstracted development of additional dimension types.
- *
- * A dimension may have two representations, an encoded representation and a actual representation.
- * For example, a value for a String dimension has an integer dictionary encoding, and an actual String representation.
+ * such as {@link io.druid.segment.incremental.IncrementalIndex} and {@link IndexMerger} and allowing for abstracted
+ * development of additional dimension types.
  *
  * A DimensionHandler is a stateless object, and thus thread-safe; its methods should be pure functions.
  *
@@ -85,7 +82,6 @@ default MultiValueHandling getMultivalueHandling()
    */
   DimensionIndexer<EncodedType, EncodedKeyComponentType, ActualType> makeIndexer();
 
-
   /**
    * Creates a new DimensionMergerV9, a per-dimension object responsible for merging indexes/row data across segments
    * and building the on-disk representation of a dimension. For use with IndexMergerV9 only.
@@ -99,14 +95,13 @@ default MultiValueHandling getMultivalueHandling()
 
    * @return A new DimensionMergerV9 object.
    */
-  DimensionMergerV9<EncodedKeyComponentType> makeMerger(
+  DimensionMergerV9 makeMerger(
       IndexSpec indexSpec,
       SegmentWriteOutMedium segmentWriteOutMedium,
       ColumnCapabilities capabilities,
       ProgressIndicator progress
   );
 
-
   /**
    * Given an key component representing a single set of row value(s) for this dimension as an Object,
    * return the length of the key component after appropriate type-casting.
@@ -119,73 +114,17 @@ DimensionMergerV9<EncodedKeyComponentType> makeMerger(
    */
   int getLengthOfEncodedKeyComponent(EncodedKeyComponentType dimVals);
 
-
-  /**
-   * Given two key components representing sorted encoded row value(s), return the result of their comparison.
-   *
-   * If the two key components have different lengths, the shorter component should be ordered first in the comparison.
-   *
-   * Otherwise, this function should iterate through the key components and return the comparison of the
-   * first difference.
-   *
-   * For dimensions that do not support multivalue rows, lhs and rhs can be compared directly.
-   *
-   * @param lhs key component from a row
-   * @param rhs key component from a row
-   *
-   * @return integer indicating comparison result of key components
-   */
-  int compareSortedEncodedKeyComponents(EncodedKeyComponentType lhs, EncodedKeyComponentType rhs);
-
-
   /**
-   * Given two key components representing sorted encoded row value(s), check that the two key components
-   * have the same encoded values, or if the encoded values differ, that they translate into the same actual values,
-   * using the mappings provided by lhsEncodings and rhsEncodings (if applicable).
-   *
-   * If validation fails, this method should throw a SegmentValidationException.
-   *
-   * Used by IndexIO for validating segments.
-   *
-   * See StringDimensionHandler.validateSortedEncodedKeyComponents() for a reference implementation.
-   *
-   * @param lhs key component from a row
-   * @param rhs key component from a row
-   * @param lhsEncodings encoding lookup from lhs's segment, null if not applicable for this dimension's type
-   * @param rhsEncodings encoding lookup from rhs's segment, null if not applicable for this dimension's type
+   * Returns a comparator that knows how to compare {@link ColumnValueSelector} of the assumed dimension type,
+   * corresponding to this DimensionHandler. E. g. {@link StringDimensionHandler} returns a comparator, that compares
+   * {@link ColumnValueSelector}s as {@link DimensionSelector}s.
    */
-  void validateSortedEncodedKeyComponents(
-      EncodedKeyComponentType lhs,
-      EncodedKeyComponentType rhs,
-      Indexed<ActualType> lhsEncodings,
-      Indexed<ActualType> rhsEncodings
-  ) throws SegmentValidationException;
-
+  Comparator<ColumnValueSelector> getEncodedValueSelectorComparator();
 
   /**
-   * Given a Column, return a type-specific object that can be used to retrieve row values.
-   *
-   * For example:
-   * - A String-typed implementation would return the result of column.getDictionaryEncoding()
-   * - A long-typed implemention would return the result of column.getGenericColumn().
-   *
-   * @param column Column for this dimension from a QueryableIndex
-   * @return The type-specific column subobject for this dimension.
-   */
-  Closeable getSubColumn(Column column);
-
-
-  /**
-   * Given a subcolumn from getSubColumn, and the index of the current row, retrieve a dimension's values
-   * from a row as an EncodedKeyComponentType.
-   *
-   * For example:
-   * - A String-typed implementation would read the current row from a DictionaryEncodedColumn as an int[].
-   * - A long-typed implemention would read the current row from a GenericColumn and return a Long.
-   *
-   * @param column Column for this dimension from a QueryableIndex
-   * @param currRow The index of the row to retrieve
-   * @return The key component for this dimension from the current row of the column.
+   * Creates and returns a new object of some implementation of {@link SettableColumnValueSelector}, that corresponds
+   * to the type of this DimensionHandler. E. g. {@link LongDimensionHandler} returns {@link
+   * io.druid.segment.selector.settable.SettableLongColumnValueSelector}, etc.
    */
-  EncodedKeyComponentType getEncodedKeyComponentFromColumn(Closeable column, int currRow);
+  SettableColumnValueSelector makeNewSettableEncodedValueSelector();
 }