apache · viirya · Jun 16, 2016 · Jun 17, 2016 · Jun 18, 2016 · Jun 19, 2016
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java
@@ -588,7 +588,7 @@ public MapData getMap(int ordinal) {
   /**
    * Returns the decimal for rowId.
    */
-  public final Decimal getDecimal(int rowId, int precision, int scale) {
+  public Decimal getDecimal(int rowId, int precision, int scale) {
     if (precision <= Decimal.MAX_INT_DIGITS()) {
       return Decimal.createUnsafe(getInt(rowId), precision, scale);
     } else if (precision <= Decimal.MAX_LONG_DIGITS()) {
@@ -617,7 +617,7 @@ public final void putDecimal(int rowId, Decimal value, int precision) {
   /**
    * Returns the UTF8String for rowId.
    */
-  public final UTF8String getUTF8String(int rowId) {
+  public UTF8String getUTF8String(int rowId) {
     if (dictionary == null) {
       ColumnVector.Array a = getByteArray(rowId);
       return UTF8String.fromBytes(a.byteArray, a.byteArrayOffset, a.length);
@@ -630,7 +630,7 @@ public final UTF8String getUTF8String(int rowId) {
   /**
    * Returns the byte array for rowId.
    */
-  public final byte[] getBinary(int rowId) {
+  public byte[] getBinary(int rowId) {
     if (dictionary == null) {
       ColumnVector.Array array = getByteArray(rowId);
       byte[] bytes = new byte[array.length];
@@ -980,6 +980,14 @@ public ColumnVector getDictionaryIds() {
     return dictionaryIds;
   }
 
+  public ColumnVector(DataType type) {
+    this.capacity = 0;
+    this.type = type;
+    this.childColumns = null;
+    this.resultArray = null;
+    this.resultStruct = null;
+  }
+
   /**
    * Sets up the common state and also handles creating the child columns if this is a nested
    * type.

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
@@ -466,6 +466,18 @@ public void filterNullsInColumn(int ordinal) {
     nullFilteredColumns.add(ordinal);
   }
 
+  /**
+   * A public Ctor which accepts allocated ColumnVectors.
+   */
+  public ColumnarBatch(ColumnVector[] columns, int maxRows) {
+    this.columns = columns;
+    this.capacity = maxRows;
+    this.schema = null;
+    this.nullFilteredColumns = new HashSet<>();
+    this.filteredRows = new boolean[maxRows];
+    this.row = new Row(this);
+  }
+
   private ColumnarBatch(StructType schema, int maxRows, MemoryMode memMode) {
     this.schema = schema;
     this.capacity = maxRows;

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -248,6 +248,12 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val ORC_VECTORIZED_READER_ENABLED =
+    SQLConfigBuilder("spark.sql.orc.enableVectorizedReader")
+      .doc("Enables vectorized orc reader.")
+      .booleanConf
+      .createWithDefault(false)
+
   val ORC_FILTER_PUSHDOWN_ENABLED = SQLConfigBuilder("spark.sql.orc.filterPushdown")
     .doc("When true, enable filter pushdown for ORC files.")
     .booleanConf
@@ -692,6 +698,8 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
 
   def parquetVectorizedReaderEnabled: Boolean = getConf(PARQUET_VECTORIZED_READER_ENABLED)
 
+  def orcVectorizedReaderEnabled: Boolean = getConf(ORC_VECTORIZED_READER_ENABLED)
+
   def columnBatchSize: Int = getConf(COLUMN_BATCH_SIZE)
 
   def numShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS)