apache · HanumathRao · Oct 12, 2017
diff --git a/contrib/format-maprdb/README.md b/contrib/format-maprdb/README.md
@@ -1,2 +1,8 @@
 drill-mapr-plugin
 =================
+By default all the tests in contrib/format-maprdb are disabled.
+To enable and run these tests please use -Pmapr profile to 
+compile and execute the tests.
+
+Here is an example of the mvn command to use to run these tests.
+mvn install -Dtests=cluster -Pmapr
diff --git a/...at-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java b/...at-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java
@@ -32,6 +32,12 @@ public class MapRDBFormatPluginConfig extends TableFormatPluginConfig {
   public boolean ignoreSchemaChange = false;
   public boolean readAllNumbersAsDouble = false;
   public boolean disableCountOptimization = false;
+  /* This flag is a switch to do special handling in case of
+   * no columns in the query exists in the maprdb table. This flag
+   * can get deprecated once it is observed that this special handling
+   * is not regressing performance of reading maprdb table.
+   */
+  public boolean nonExistentFieldSupport = true;
 
   @Override
   public int hashCode() {
@@ -40,6 +46,7 @@ public int hashCode() {
     result = 31 * result + (ignoreSchemaChange ? 1231 : 1237);
     result = 31 * result + (readAllNumbersAsDouble ? 1231 : 1237);
     result = 31 * result + (disableCountOptimization ? 1231 : 1237);
+    result = 31 * result + (nonExistentFieldSupport ? 1231 : 1237);
     return result;
   }
 
@@ -56,6 +63,8 @@ protected boolean impEquals(Object obj) {
       return false;
     } else if (disableCountOptimization != other.disableCountOptimization) {
       return false;
+    } else if (nonExistentFieldSupport != other.nonExistentFieldSupport) {
+      return false;
     }
     return true;
   }
@@ -76,6 +85,8 @@ public boolean isEnablePushdown() {
     return enablePushdown;
   }
 
+  public boolean isNonExistentFieldSupport() { return nonExistentFieldSupport; }
+
   public boolean isIgnoreSchemaChange() {
     return ignoreSchemaChange;
   }

diff --git a/...maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java b/...maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java
@@ -26,6 +26,7 @@
 import java.util.List;
 import java.util.Set;
 import java.util.Stack;
+import java.util.Collections;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.drill.common.exceptions.ExecutionSetupException;
@@ -44,6 +45,7 @@
 import org.apache.drill.exec.util.Utilities;
 import org.apache.drill.exec.vector.BaseValueVector;
 import org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl;
+import org.apache.drill.exec.vector.complex.fn.JsonReaderUtils;
 import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter;
 import org.ojai.DocumentReader;
 import org.ojai.DocumentReader.EventType;
@@ -95,6 +97,7 @@ public class MaprDBJsonRecordReader extends AbstractRecordReader {
   private final boolean allTextMode;
   private final boolean ignoreSchemaChange;
   private final boolean disableCountOptimization;
+  private final boolean nonExistentColumnsProjection;
 
   public MaprDBJsonRecordReader(MapRDBSubScanSpec subScanSpec,
       MapRDBFormatPluginConfig formatPluginConfig,
@@ -119,6 +122,7 @@ public MaprDBJsonRecordReader(MapRDBSubScanSpec subScanSpec,
     allTextMode = formatPluginConfig.isAllTextMode();
     ignoreSchemaChange = formatPluginConfig.isIgnoreSchemaChange();
     disablePushdown = !formatPluginConfig.isEnablePushdown();
+    nonExistentColumnsProjection = formatPluginConfig.isNonExistentFieldSupport();
   }
 
   @Override
@@ -230,6 +234,9 @@ public int next() {
       }
     }
 
+    if (nonExistentColumnsProjection && recordCount > 0) {
+      JsonReaderUtils.ensureAtLeastOneField(vectorWriter, getColumns(), allTextMode, Collections.EMPTY_LIST);
+    }
     vectorWriter.setValueCount(recordCount);
     logger.debug("Took {} ms to get {} records", watch.elapsed(TimeUnit.MILLISECONDS), recordCount);
     return recordCount;

diff --git a/contrib/format-maprdb/src/test/java/com/mapr/drill/maprdb/tests/json/TestSimpleJson.java b/contrib/format-maprdb/src/test/java/com/mapr/drill/maprdb/tests/json/TestSimpleJson.java
@@ -57,6 +57,16 @@ public void testSelectId() throws Exception {
     runSQLAndVerifyCount(sql, 10);
   }
 
+  @Test
+  public void testSelectNonExistentColumns() throws Exception {
+    setColumnWidths(new int[] {23});
+    final String sql = "SELECT\n"
+            + "  something\n"
+            + "FROM\n"
+            + "  hbase.business business limit 5";
+    runSQLAndVerifyCount(sql, 5);
+  }
+
   @Test
   public void testKVGen() throws Exception {
     setColumnWidths(new int[] {21, 10, 6});

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
@@ -104,71 +104,7 @@ public JsonReader(DrillBuf managedBuf, List<SchemaPath> columns,
   @SuppressWarnings("resource")
   @Override
   public void ensureAtLeastOneField(ComplexWriter writer) {
-    List<BaseWriter.MapWriter> writerList = Lists.newArrayList();
-    List<PathSegment> fieldPathList = Lists.newArrayList();
-    BitSet emptyStatus = new BitSet(columns.size());
-
-    // first pass: collect which fields are empty
-    for (int i = 0; i < columns.size(); i++) {
-      SchemaPath sp = columns.get(i);
-      PathSegment fieldPath = sp.getRootSegment();
-      BaseWriter.MapWriter fieldWriter = writer.rootAsMap();
-      while (fieldPath.getChild() != null && !fieldPath.getChild().isArray()) {
-        fieldWriter = fieldWriter.map(fieldPath.getNameSegment().getPath());
-        fieldPath = fieldPath.getChild();
-      }
-      writerList.add(fieldWriter);
-      fieldPathList.add(fieldPath);
-      if (fieldWriter.isEmptyMap()) {
-        emptyStatus.set(i, true);
-      }
-      if (i == 0 && !allTextMode) {
-        // when allTextMode is false, there is not much benefit to producing all
-        // the empty
-        // fields; just produce 1 field. The reason is that the type of the
-        // fields is
-        // unknown, so if we produce multiple Integer fields by default, a
-        // subsequent batch
-        // that contains non-integer fields will error out in any case. Whereas,
-        // with
-        // allTextMode true, we are sure that all fields are going to be treated
-        // as varchar,
-        // so it makes sense to produce all the fields, and in fact is necessary
-        // in order to
-        // avoid schema change exceptions by downstream operators.
-        break;
-      }
-
-    }
-
-    // second pass: create default typed vectors corresponding to empty fields
-    // Note: this is not easily do-able in 1 pass because the same fieldWriter
-    // may be
-    // shared by multiple fields whereas we want to keep track of all fields
-    // independently,
-    // so we rely on the emptyStatus.
-    for (int j = 0; j < fieldPathList.size(); j++) {
-      BaseWriter.MapWriter fieldWriter = writerList.get(j);
-      PathSegment fieldPath = fieldPathList.get(j);
-      if (emptyStatus.get(j)) {
-        if (allTextMode) {
-          fieldWriter.varChar(fieldPath.getNameSegment().getPath());
-        } else {
-          fieldWriter.integer(fieldPath.getNameSegment().getPath());
-        }
-      }
-    }
-
-    for (ListWriter field : emptyArrayWriters) {
-      // checks that array has not been initialized
-      if (field.getValueCapacity() == 0) {
-        if (allTextMode) {
-          field.varChar();
-        } else {
-          field.integer();
-        }
-      }
-    }
+    JsonReaderUtils.ensureAtLeastOneField(writer, columns, allTextMode, emptyArrayWriters);
   }
 
   public void setSource(int start, int end, DrillBuf buf) throws IOException {

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.complex.fn;
+
+import com.google.common.collect.Lists;
+import org.apache.drill.common.expression.PathSegment;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter;
+
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.List;
+
+public class JsonReaderUtils {
+
+  public static void ensureAtLeastOneField(BaseWriter.ComplexWriter writer,
+                                    Collection<SchemaPath> columns,
+                                    boolean allTextMode,
+                                    List<BaseWriter.ListWriter> emptyArrayWriters) {
+
+    List<BaseWriter.MapWriter> writerList = Lists.newArrayList();
+    List<PathSegment> fieldPathList = Lists.newArrayList();
+    BitSet emptyStatus = new BitSet(columns.size());
+    int i = 0;
+
+    // first pass: collect which fields are empty
+    for (SchemaPath sp : columns) {
+      PathSegment fieldPath = sp.getRootSegment();
+      BaseWriter.MapWriter fieldWriter = writer.rootAsMap();
+      while (fieldPath.getChild() != null && !fieldPath.getChild().isArray()) {
+        fieldWriter = fieldWriter.map(fieldPath.getNameSegment().getPath());
+        fieldPath = fieldPath.getChild();
+      }
+      writerList.add(fieldWriter);
+      fieldPathList.add(fieldPath);
+      if (fieldWriter.isEmptyMap()) {
+        emptyStatus.set(i, true);
+      }
+      if (i == 0 && !allTextMode) {
+        // when allTextMode is false, there is not much benefit to producing all
+        // the empty fields; just produce 1 field. The reason is that the type of the
+        // fields is unknown, so if we produce multiple Integer fields by default, a
+        // subsequent batch that contains non-integer fields will error out in any case.
+        // Whereas, with allTextMode true, we are sure that all fields are going to be
+        // treated as varchar, so it makes sense to produce all the fields, and in fact
+        // is necessary in order to avoid schema change exceptions by downstream operators.
+        break;
+      }
+      i++;
+    }
+
+    // second pass: create default typed vectors corresponding to empty fields
+    // Note: this is not easily do-able in 1 pass because the same fieldWriter
+    // may be shared by multiple fields whereas we want to keep track of all fields
+    // independently, so we rely on the emptyStatus.
+    for (int j = 0; j < fieldPathList.size(); j++) {
+      BaseWriter.MapWriter fieldWriter = writerList.get(j);
+      PathSegment fieldPath = fieldPathList.get(j);
+      if (emptyStatus.get(j)) {
+        if (allTextMode) {
+          fieldWriter.varChar(fieldPath.getNameSegment().getPath());
+        } else {
+          fieldWriter.integer(fieldPath.getNameSegment().getPath());
+        }
+      }
+    }
+
+    for (BaseWriter.ListWriter field : emptyArrayWriters) {
+      // checks that array has not been initialized
+      if (field.getValueCapacity() == 0) {
+        if (allTextMode) {
+          field.varChar();
+        } else {
+          field.integer();
+        }
+      }
+    }
+  }
+}