From 0c07e2857f09e6f262b246e795f10901ed744e3d Mon Sep 17 00:00:00 2001 From: Smidth Panchamia Date: Mon, 28 Nov 2016 13:59:34 -0800 Subject: [PATCH 1/2] DRILL-5065 - Optimize count( * ) queries on MapR-DB JSON Tables In MapR-DB v5.2.0, we enabled '_id' only projection for JSON tables. Hence, we can now optimize the following queries: a. count(*) by projecting only the '_id' column. b. '_id' only projections, including count(_id) Change the format plugin config parameter name. --- .../mapr/db/MapRDBFormatPluginConfig.java | 12 ++++ .../mapr/db/json/MaprDBJsonRecordReader.java | 64 +++++++++++-------- 2 files changed, 51 insertions(+), 25 deletions(-) diff --git a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java index 1bb07ed0be6..8c31ce7600f 100644 --- a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java +++ b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java @@ -31,6 +31,7 @@ public class MapRDBFormatPluginConfig extends TableFormatPluginConfig { public boolean enablePushdown = true; public boolean ignoreSchemaChange = false; public boolean readAllNumbersAsDouble = false; + public boolean disableCountOptimization = false; @Override public int hashCode() { @@ -48,6 +49,8 @@ protected boolean impEquals(Object obj) { return false; } else if (enablePushdown != other.enablePushdown) { return false; + } else if (disableCountOptimization != other.disableCountOptimization) { + return false; } return true; } @@ -65,6 +68,15 @@ public void setAllTextMode(boolean mode) { allTextMode = mode; } + @JsonProperty("disableCountOptimization") + public void setReadDocumentForCount(boolean mode) { + disableCountOptimization = mode; + } + + public boolean shouldDisableCountOptimization() { + return disableCountOptimization; + } + @JsonProperty("readAllNumbersAsDouble") public void setReadAllNumbersAsDouble(boolean read) { readAllNumbersAsDouble = read; diff --git a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java index ee6b15db408..3105bec4146 100644 --- a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java +++ b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java @@ -18,6 +18,7 @@ package org.apache.drill.exec.store.mapr.db.json; import static org.ojai.DocumentConstants.ID_KEY; +import static org.ojai.DocumentConstants.ID_FIELD; import java.nio.ByteBuffer; import java.util.Collection; @@ -93,6 +94,7 @@ public class MaprDBJsonRecordReader extends AbstractRecordReader { private boolean disablePushdown; private final boolean allTextMode; private final boolean ignoreSchemaChange; + private final boolean disableCountOptimization; public MaprDBJsonRecordReader(MapRDBSubScanSpec subScanSpec, MapRDBFormatPluginConfig formatPluginConfig, @@ -110,6 +112,7 @@ public MaprDBJsonRecordReader(MapRDBSubScanSpec subScanSpec, condition = com.mapr.db.impl.ConditionImpl.parseFrom(ByteBufs.wrap(serializedFilter)); } + disableCountOptimization = formatPluginConfig.shouldDisableCountOptimization(); setColumns(projectedColumns); unionEnabled = context.getOptions().getOption(ExecConstants.ENABLE_UNION_TYPE); readNumbersAsDouble = formatPluginConfig.isReadAllNumbersAsDouble(); @@ -121,36 +124,47 @@ public MaprDBJsonRecordReader(MapRDBSubScanSpec subScanSpec, @Override protected Collection transformColumns(Collection columns) { Set transformed = Sets.newLinkedHashSet(); - if (!isStarQuery() && !disablePushdown) { - Set projectedFieldsSet = Sets.newTreeSet(); - for (SchemaPath column : columns) { - if (column.getRootSegment().getPath().equalsIgnoreCase(ID_KEY)) { - /* - * we do not include _id field in the set of projected fields - * because the DB currently can not return a document if only - * the _id field was projected. This should really be fixed in - * the DB client (Bug 21708) to avoid transferring the entire - * document when only _id is requested. - */ - // projectedFieldsList.add(ID_FIELD); - includeId = true; - } else { - projectedFieldsSet.add(getFieldPathForProjection(column)); + if (disablePushdown) { + transformed.add(AbstractRecordReader.STAR_COLUMN); + includeId = true; + return transformed; + } + + if (isStarQuery()) { + transformed.add(AbstractRecordReader.STAR_COLUMN); + includeId = true; + if (isSkipQuery()) { + // `SELECT COUNT(*)` query + if (!disableCountOptimization) { + projectedFields = new FieldPath[1]; + projectedFields[0] = ID_FIELD; } - transformed.add(column); } - if (projectedFieldsSet.size() > 0) { - projectedFields = projectedFieldsSet.toArray(new FieldPath[projectedFieldsSet.size()]); + return transformed; + } + + Set projectedFieldsSet = Sets.newTreeSet(); + for (SchemaPath column : columns) { + if (column.getRootSegment().getPath().equalsIgnoreCase(ID_KEY)) { + includeId = true; + if (!disableCountOptimization) { + projectedFieldsSet.add(ID_FIELD); + } + } else { + projectedFieldsSet.add(getFieldPathForProjection(column)); } - } else { - transformed.add(AbstractRecordReader.STAR_COLUMN); - includeId = true; + + transformed.add(column); + } + + if (projectedFieldsSet.size() > 0) { + projectedFields = projectedFieldsSet.toArray(new FieldPath[projectedFieldsSet.size()]); + } + + if (disableCountOptimization) { + idOnly = (projectedFields == null); } - /* - * (Bug 21708) if we are projecting only the id field, save that condition here. - */ - idOnly = !isStarQuery() && (projectedFields == null); return transformed; } From 0ebaad1c85095443db03a7d6f1c9a774fd99c60b Mon Sep 17 00:00:00 2001 From: Smidth Panchamia Date: Mon, 28 Nov 2016 13:59:34 -0800 Subject: [PATCH 2/2] DRILL-5065 - Optimize count( * ) queries on MapR-DB JSON Tables In MapR-DB v5.2.0, we enabled '_id' only projection for JSON tables. Hence, we can now optimize the following queries: a. count(*) by projecting only the '_id' column. b. '_id' only projections, including count(_id) Change the format plugin config parameter name. Fix setter of config parameter `disableCountOptimization` for drill-maprdb plugin --- .../mapr/db/MapRDBFormatPluginConfig.java | 12 ++++ .../mapr/db/json/MaprDBJsonRecordReader.java | 64 +++++++++++-------- 2 files changed, 51 insertions(+), 25 deletions(-) diff --git a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java index 1bb07ed0be6..8b89b786157 100644 --- a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java +++ b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java @@ -31,6 +31,7 @@ public class MapRDBFormatPluginConfig extends TableFormatPluginConfig { public boolean enablePushdown = true; public boolean ignoreSchemaChange = false; public boolean readAllNumbersAsDouble = false; + public boolean disableCountOptimization = false; @Override public int hashCode() { @@ -48,6 +49,8 @@ protected boolean impEquals(Object obj) { return false; } else if (enablePushdown != other.enablePushdown) { return false; + } else if (disableCountOptimization != other.disableCountOptimization) { + return false; } return true; } @@ -65,6 +68,15 @@ public void setAllTextMode(boolean mode) { allTextMode = mode; } + @JsonProperty("disableCountOptimization") + public void setDisableCountOptimization(boolean mode) { + disableCountOptimization = mode; + } + + public boolean shouldDisableCountOptimization() { + return disableCountOptimization; + } + @JsonProperty("readAllNumbersAsDouble") public void setReadAllNumbersAsDouble(boolean read) { readAllNumbersAsDouble = read; diff --git a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java index ee6b15db408..3105bec4146 100644 --- a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java +++ b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java @@ -18,6 +18,7 @@ package org.apache.drill.exec.store.mapr.db.json; import static org.ojai.DocumentConstants.ID_KEY; +import static org.ojai.DocumentConstants.ID_FIELD; import java.nio.ByteBuffer; import java.util.Collection; @@ -93,6 +94,7 @@ public class MaprDBJsonRecordReader extends AbstractRecordReader { private boolean disablePushdown; private final boolean allTextMode; private final boolean ignoreSchemaChange; + private final boolean disableCountOptimization; public MaprDBJsonRecordReader(MapRDBSubScanSpec subScanSpec, MapRDBFormatPluginConfig formatPluginConfig, @@ -110,6 +112,7 @@ public MaprDBJsonRecordReader(MapRDBSubScanSpec subScanSpec, condition = com.mapr.db.impl.ConditionImpl.parseFrom(ByteBufs.wrap(serializedFilter)); } + disableCountOptimization = formatPluginConfig.shouldDisableCountOptimization(); setColumns(projectedColumns); unionEnabled = context.getOptions().getOption(ExecConstants.ENABLE_UNION_TYPE); readNumbersAsDouble = formatPluginConfig.isReadAllNumbersAsDouble(); @@ -121,36 +124,47 @@ public MaprDBJsonRecordReader(MapRDBSubScanSpec subScanSpec, @Override protected Collection transformColumns(Collection columns) { Set transformed = Sets.newLinkedHashSet(); - if (!isStarQuery() && !disablePushdown) { - Set projectedFieldsSet = Sets.newTreeSet(); - for (SchemaPath column : columns) { - if (column.getRootSegment().getPath().equalsIgnoreCase(ID_KEY)) { - /* - * we do not include _id field in the set of projected fields - * because the DB currently can not return a document if only - * the _id field was projected. This should really be fixed in - * the DB client (Bug 21708) to avoid transferring the entire - * document when only _id is requested. - */ - // projectedFieldsList.add(ID_FIELD); - includeId = true; - } else { - projectedFieldsSet.add(getFieldPathForProjection(column)); + if (disablePushdown) { + transformed.add(AbstractRecordReader.STAR_COLUMN); + includeId = true; + return transformed; + } + + if (isStarQuery()) { + transformed.add(AbstractRecordReader.STAR_COLUMN); + includeId = true; + if (isSkipQuery()) { + // `SELECT COUNT(*)` query + if (!disableCountOptimization) { + projectedFields = new FieldPath[1]; + projectedFields[0] = ID_FIELD; } - transformed.add(column); } - if (projectedFieldsSet.size() > 0) { - projectedFields = projectedFieldsSet.toArray(new FieldPath[projectedFieldsSet.size()]); + return transformed; + } + + Set projectedFieldsSet = Sets.newTreeSet(); + for (SchemaPath column : columns) { + if (column.getRootSegment().getPath().equalsIgnoreCase(ID_KEY)) { + includeId = true; + if (!disableCountOptimization) { + projectedFieldsSet.add(ID_FIELD); + } + } else { + projectedFieldsSet.add(getFieldPathForProjection(column)); } - } else { - transformed.add(AbstractRecordReader.STAR_COLUMN); - includeId = true; + + transformed.add(column); + } + + if (projectedFieldsSet.size() > 0) { + projectedFields = projectedFieldsSet.toArray(new FieldPath[projectedFieldsSet.size()]); + } + + if (disableCountOptimization) { + idOnly = (projectedFields == null); } - /* - * (Bug 21708) if we are projecting only the id field, save that condition here. - */ - idOnly = !isStarQuery() && (projectedFields == null); return transformed; }