From b4104e73ad37dee96e29e345b958412791ab9079 Mon Sep 17 00:00:00 2001 From: Padma Penumarthy Date: Mon, 5 Feb 2018 21:41:45 -0800 Subject: [PATCH] DRILL-6138: Move RecordBatchSizer to org.apache.drill.exec.record package --- .../impl/aggregate/HashAggTemplate.java | 2 +- .../impl/flatten/FlattenRecordBatch.java | 2 +- .../physical/impl/xsort/managed/SortImpl.java | 2 +- .../rowSet/model/single/VectorAllocator.java | 5 ++--- .../spill => record}/RecordBatchSizer.java | 22 +++++++------------ .../impl/xsort/managed/TestShortArrays.java | 6 ++--- .../physical/unit/TestOutputBatchSize.java | 2 +- .../apache/drill/test/DrillTestWrapper.java | 3 +-- .../test/rowSet/AbstractSingleRowSet.java | 2 +- .../drill/test/rowSet/IndirectRowSet.java | 2 +- 10 files changed, 20 insertions(+), 28 deletions(-) rename exec/java-exec/src/main/java/org/apache/drill/exec/{physical/impl/spill => record}/RecordBatchSizer.java (96%) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java index ef8d9d90ba5..4c540801697 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java @@ -53,7 +53,7 @@ import org.apache.drill.exec.physical.impl.common.HashTableStats; import org.apache.drill.exec.physical.impl.common.IndexPointer; -import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer; +import org.apache.drill.exec.record.RecordBatchSizer; import org.apache.drill.exec.physical.impl.spill.SpillSet; import org.apache.drill.exec.planner.physical.AggPrelBase; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/flatten/FlattenRecordBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/flatten/FlattenRecordBatch.java index 9483f295678..5f693cb649e 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/flatten/FlattenRecordBatch.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/flatten/FlattenRecordBatch.java @@ -41,7 +41,7 @@ import org.apache.drill.exec.expr.ValueVectorWriteExpression; import org.apache.drill.exec.ops.FragmentContext; import org.apache.drill.exec.physical.config.FlattenPOP; -import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer; +import org.apache.drill.exec.record.RecordBatchSizer; import org.apache.drill.exec.record.AbstractSingleRecordBatch; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; import org.apache.drill.exec.record.MaterializedField; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/xsort/managed/SortImpl.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/xsort/managed/SortImpl.java index 9fb478e497a..23ace36415e 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/xsort/managed/SortImpl.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/xsort/managed/SortImpl.java @@ -22,7 +22,7 @@ import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.ops.OperatorContext; -import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer; +import org.apache.drill.exec.record.RecordBatchSizer; import org.apache.drill.exec.physical.impl.xsort.MSortTemplate; import org.apache.drill.exec.physical.impl.xsort.managed.BatchGroup.InputBatch; import org.apache.drill.exec.physical.impl.xsort.managed.SortMemoryManager.MergeTask; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/VectorAllocator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/VectorAllocator.java index 34a69606d42..e29a5cb06d3 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/VectorAllocator.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/VectorAllocator.java @@ -20,7 +20,6 @@ import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; -import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer; import org.apache.drill.exec.physical.rowSet.model.MetadataProvider; import org.apache.drill.exec.physical.rowSet.model.MetadataProvider.MetadataCreator; import org.apache.drill.exec.physical.rowSet.model.MetadataProvider.MetadataRetrieval; @@ -38,8 +37,8 @@ * row count and the size information provided in column metadata. *

* @see {@link AllocationHelper} - the class which this one replaces - * @see {@link VectorInitializer} - an earlier cut at implementation - * based on data from the {@link RecordBatchSizer} + * @see {@link org.apache.drill.exec.record.VectorInitializer} - an earlier cut at implementation + * based on data from the {@link org.apache.drill.exec.record.RecordBatchSizer} */ // TODO: Does not yet handle lists; lists are a simple extension diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/spill/RecordBatchSizer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java similarity index 96% rename from exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/spill/RecordBatchSizer.java rename to exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java index 0fe67d5a3a0..f5c77ce55ac 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/spill/RecordBatchSizer.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java @@ -15,23 +15,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.drill.exec.physical.impl.spill; +package org.apache.drill.exec.record; -import java.util.ArrayList; -import java.util.List; import java.util.Set; +import java.util.Map; +import org.apache.drill.common.map.CaseInsensitiveMap; import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.expr.TypeHelper; import org.apache.drill.exec.memory.AllocationManager.BufferLedger; import org.apache.drill.exec.memory.BaseAllocator; -import org.apache.drill.exec.record.BatchSchema; -import org.apache.drill.exec.record.MaterializedField; -import org.apache.drill.exec.record.RecordBatch; -import org.apache.drill.exec.record.VectorInitializer; -import org.apache.drill.exec.record.VectorAccessible; -import org.apache.drill.exec.record.VectorWrapper; import org.apache.drill.exec.record.selection.SelectionVector2; import org.apache.drill.exec.vector.UInt4Vector; import org.apache.drill.exec.vector.ValueVector; @@ -260,7 +254,7 @@ public static ColumnSize getColumn(ValueVector v, String prefix) { public static final int MAX_VECTOR_SIZE = ValueVector.MAX_BUFFER_SIZE; // 16 MiB - private List columnSizes = new ArrayList<>(); + private Map columnSizes = CaseInsensitiveMap.newHashMap(); /** * Number of records (rows) in the batch. @@ -394,7 +388,7 @@ private int roundUpToPowerOf2(int arg) { private void measureColumn(ValueVector v, String prefix) { ColumnSize colSize = new ColumnSize(v, prefix); - columnSizes.add(colSize); + columnSizes.put(v.getField().getName(), colSize); stdRowWidth += colSize.stdSize; netBatchSize += colSize.dataSize; maxSize = Math.max(maxSize, colSize.dataSize); @@ -458,7 +452,7 @@ public static int safeDivide(long num, long denom) { public int stdRowWidth() { return stdRowWidth; } public int grossRowWidth() { return grossRowWidth; } public int netRowWidth() { return netRowWidth; } - public List columns() { return columnSizes; } + public Map columns() { return columnSizes; } /** * Compute the "real" width of the row, taking into account each varchar column size @@ -477,7 +471,7 @@ public static int safeDivide(long num, long denom) { public String toString() { StringBuilder buf = new StringBuilder(); buf.append("Actual batch schema & sizes {\n"); - for (ColumnSize colSize : columnSizes) { + for (ColumnSize colSize : columnSizes.values()) { buf.append(" "); buf.append(colSize.toString()); buf.append("\n"); @@ -508,7 +502,7 @@ public String toString() { public VectorInitializer buildVectorInitializer() { VectorInitializer initializer = new VectorInitializer(); - for (ColumnSize colSize : columnSizes) { + for (ColumnSize colSize : columnSizes.values()) { colSize.buildVectorInitializer(initializer); } return initializer; diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestShortArrays.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestShortArrays.java index 38e3698ecd0..3c210f783be 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestShortArrays.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestShortArrays.java @@ -23,8 +23,8 @@ import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MinorType; -import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer; -import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer.ColumnSize; +import org.apache.drill.exec.record.RecordBatchSizer; +import org.apache.drill.exec.record.RecordBatchSizer.ColumnSize; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.VectorInitializer; import org.apache.drill.exec.record.VectorInitializer.AllocationHint; @@ -71,7 +71,7 @@ public void testSizer() { RecordBatchSizer sizer = new RecordBatchSizer(rows.container()); assertEquals(2, sizer.columns().size()); - ColumnSize bCol = sizer.columns().get(1); + ColumnSize bCol = sizer.columns().get("b"); assertEquals(0.1, bCol.estElementCountPerArray, 0.01); assertEquals(1, bCol.elementCount); diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java index 4a1dc8fa734..9a4633d27cd 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java @@ -26,7 +26,7 @@ import org.apache.drill.exec.physical.base.PhysicalOperator; import org.apache.drill.exec.physical.config.FlattenPOP; import org.apache.drill.exec.physical.impl.ScanBatch; -import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer; +import org.apache.drill.exec.record.RecordBatchSizer; import org.apache.drill.exec.record.RecordBatch; import org.apache.drill.exec.record.VectorAccessible; import org.apache.drill.exec.util.JsonStringArrayList; diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java b/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java index c470b0dd7de..78e32eead7d 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java @@ -40,8 +40,7 @@ import org.apache.drill.exec.HyperVectorValueIterator; import org.apache.drill.exec.exception.SchemaChangeException; import org.apache.drill.exec.memory.BufferAllocator; -import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer; -import org.apache.drill.exec.physical.unit.PhysicalOpUnitTestBase; +import org.apache.drill.exec.record.RecordBatchSizer; import org.apache.drill.exec.proto.UserBitShared; import org.apache.drill.exec.proto.UserBitShared.QueryType; import org.apache.drill.exec.record.BatchSchema; diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java index e1e18dc9803..ef41b3a0051 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java @@ -17,7 +17,7 @@ */ package org.apache.drill.test.rowSet; -import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer; +import org.apache.drill.exec.record.RecordBatchSizer; import org.apache.drill.exec.physical.rowSet.model.ReaderIndex; import org.apache.drill.exec.physical.rowSet.model.MetadataProvider.MetadataRetrieval; import org.apache.drill.exec.physical.rowSet.model.single.BaseReaderBuilder; diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/IndirectRowSet.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/IndirectRowSet.java index e729bba9aa1..a0ef6f0f05e 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/IndirectRowSet.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/IndirectRowSet.java @@ -19,7 +19,7 @@ import org.apache.drill.exec.exception.OutOfMemoryException; import org.apache.drill.exec.memory.BufferAllocator; -import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer; +import org.apache.drill.exec.record.RecordBatchSizer; import org.apache.drill.exec.physical.rowSet.model.ReaderIndex; import org.apache.drill.exec.physical.rowSet.model.SchemaInference; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;