From 5f40587dddef9193b499927957cd5784f8d24464 Mon Sep 17 00:00:00 2001 From: Mehant Baid Date: Tue, 22 Sep 2015 13:09:51 -0700 Subject: [PATCH 1/4] DRILL-3817: Disable rewriting compound identifier for refresh metadata query --- .../sql/parser/CompoundIdentifierConverter.java | 1 + .../exec/store/parquet/TestParquetMetadataCache.java | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java index ebe6d396910..3e4c59c6e1f 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java @@ -161,6 +161,7 @@ RewriteType[] should be R(D, E, D, D). rules.put(SqlJoin.class, R(D, D, D, D, D, E)); rules.put(SqlOrderBy.class, R(D, E, D, D)); rules.put(SqlDropTable.class, R(D)); + rules.put(SqlRefreshMetadata.class, R(D)); REWRITE_RULES = ImmutableMap.copyOf(rules); } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java index ccaa9e728c5..5a88cf94729 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java @@ -54,6 +54,16 @@ public void testUpdate() throws Exception { Assert.assertEquals(50, rowCount); } + @Test + public void testCacheWithSubschema() throws Exception { + String tableName = "nation_ctas_subschema"; + test(String.format("create table dfs_test.tmp.`%s/t1` as select * from cp.`tpch/nation.parquet`", tableName)); + test(String.format("refresh table metadata dfs_test.tmp.%s", tableName)); + checkForMetadataFile(tableName); + int rowCount = testSql(String.format("select * from dfs_test.tmp.%s", tableName)); + Assert.assertEquals(25, rowCount); + } + private void checkForMetadataFile(String table) throws Exception { String tmpDir = getDfsTestTmpSchemaLocation(); String metaFile = Joiner.on("/").join(tmpDir, table, Metadata.METADATA_FILENAME); From 09859085fb5bbd88ac2a6012050cc63164045ebf Mon Sep 17 00:00:00 2001 From: Mehant Baid Date: Tue, 22 Sep 2015 13:13:22 -0700 Subject: [PATCH 2/4] DRILL-3819: Remove redundant check to ignore files beginning with '.' --- .../java/org/apache/drill/exec/store/dfs/DrillPathFilter.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/DrillPathFilter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/DrillPathFilter.java index 5c2d71a1d87..00f463d1cf8 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/DrillPathFilter.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/DrillPathFilter.java @@ -29,9 +29,6 @@ public boolean accept(Path path) { if (path.getName().startsWith(DrillFileSystem.DOT_FILE_PREFIX)) { return false; } - if (path.getName().startsWith(".")) { - return false; - } return super.accept(path); } } From e87ae8669badc4c0091cbb2041632001ac9e1618 Mon Sep 17 00:00:00 2001 From: Mehant Baid Date: Thu, 24 Sep 2015 16:35:47 -0700 Subject: [PATCH 3/4] DRILL-3788: Expand the file selection to contain all files within the directory while creating DynamicDrillTable --- .../FileSystemPartitionDescriptor.java | 3 +- .../store/parquet/ParquetFormatPlugin.java | 2 +- .../exec/store/parquet/ParquetGroupScan.java | 2 +- .../apache/drill/TestCTASPartitionFilter.java | 2 +- .../parquet/TestParquetMetadataCache.java | 40 ++++++++++++++++++- 5 files changed, 43 insertions(+), 6 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java index c10d0af569b..fd2f850e8c3 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java @@ -28,6 +28,7 @@ import com.google.common.collect.Maps; import org.apache.calcite.util.BitSets; +import org.apache.drill.common.exceptions.DrillRuntimeException; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.types.TypeProtos; import org.apache.drill.common.types.Types; @@ -95,7 +96,7 @@ int record = 0; for (PartitionLocation partitionLocation: partitions) { for (int partitionColumnIndex : BitSets.toIter(partitionColumnBitSet)) { if (partitionLocation.getPartitionValue(partitionColumnIndex) == null) { - ((NullableVarCharVector) vectors[partitionColumnIndex]).getMutator().setNull(record); + throw new DrillRuntimeException("Value for directory cannot be null"); } else { byte[] bytes = (partitionLocation.getPartitionValue(partitionColumnIndex)).getBytes(Charsets.UTF_8); ((NullableVarCharVector) vectors[partitionColumnIndex]).getMutator().setSafe(record, bytes, 0, bytes.length); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java index 3b7839a154a..e72f4a8b16d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java @@ -206,7 +206,7 @@ public FormatSelection isReadable(DrillFileSystem fs, FileSelection selection) t // TODO: we only check the first file for directory reading. This is because if(selection.containsDirectories(fs)){ if(isDirReadable(fs, selection.getFirstPath(fs))){ - return new FormatSelection(plugin.getConfig(), selection); + return new FormatSelection(plugin.getConfig(), selection.minusDirectories(fs)); } } return super.isReadable(fs, selection); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java index 7800721401d..3080d11ec86 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java @@ -497,7 +497,7 @@ private void init() throws IOException { } else { Path p = Path.getPathWithoutSchemeAndAuthority(new Path(selectionRoot)); Path metaPath = new Path(p, Metadata.METADATA_FILENAME); - if (fs.exists(metaPath)) { + if (fs.exists(metaPath) && fileSet != null) { parquetTableMetadata = removeUnneededRowGroups(Metadata.readBlockMeta(fs, metaPath.toString())); } else { fileStatuses = Lists.newArrayList(); diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestCTASPartitionFilter.java b/exec/java-exec/src/test/java/org/apache/drill/TestCTASPartitionFilter.java index 9886024fbc3..1f53a81a367 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/TestCTASPartitionFilter.java +++ b/exec/java-exec/src/test/java/org/apache/drill/TestCTASPartitionFilter.java @@ -74,7 +74,7 @@ public void testDRILL3410() throws Exception { test("use dfs_test.tmp"); test(String.format("create table drill_3410 partition by (o_orderpriority) as select * from dfs_test.`%s/multilevel/parquet`", TEST_RES_PATH)); String query = "select * from drill_3410 where (o_orderpriority = '1-URGENT' and o_orderkey = 10) or (o_orderpriority = '2-HIGH' or o_orderkey = 11)"; - testIncludeFilter(query, 1, "Filter", 34); + testIncludeFilter(query, 5, "Filter", 34); } @Test diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java index 5a88cf94729..12158c979a9 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java @@ -18,17 +18,53 @@ package org.apache.drill.exec.store.parquet; import com.google.common.base.Joiner; +import org.apache.commons.io.filefilter.IOFileFilter; import org.apache.drill.BaseTestQuery; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; +import org.apache.drill.PlanTestBase; +import org.apache.drill.common.util.TestTools; +import org.apache.commons.io.FileUtils; +import org.apache.drill.exec.store.dfs.DrillPathFilter; import org.apache.hadoop.fs.Path; import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Test; import java.io.File; import java.nio.file.Files; +import static org.junit.Assert.assertEquals; + public class TestParquetMetadataCache extends BaseTestQuery { + private static final String WORKING_PATH = TestTools.getWorkingPath(); + private static final String TEST_RES_PATH = WORKING_PATH + "/src/test/resources"; + private static final String tableName = "parquetTable"; + + + @BeforeClass + public static void copyData() throws Exception { + // copy the data into the temporary location + String tmpLocation = getDfsTestTmpSchemaLocation(); + File dataDir = new File(tmpLocation + Path.SEPARATOR + tableName); + dataDir.mkdir(); + FileUtils.copyDirectory(new File(String.format(String.format("%s/multilevel/parquet", TEST_RES_PATH))), + dataDir); + } + + @Test + public void testPartitionPruningWithMetadataCache() throws Exception { + test(String.format("refresh table metadata dfs_test.`%s/%s`", getDfsTestTmpSchemaLocation(), tableName)); + checkForMetadataFile(tableName); + String query = String.format("select dir0, dir1, o_custkey, o_orderdate from dfs_test.`%s/%s` " + + " where dir0=1994 and dir1='Q1'", + getDfsTestTmpSchemaLocation(), tableName); + int expectedRowCount = 10; + int expectedNumFiles = 1; + + int actualRowCount = testSql(query); + assertEquals(expectedRowCount, actualRowCount); + String numFilesPattern = "numFiles=" + expectedNumFiles; + PlanTestBase.testPlanMatchingPatterns(query, new String[]{numFilesPattern}, new String[] {"Filter"}); + } @Test public void testCache() throws Exception { From 92dad4f208f0569f7ad148f4d5b0122510df8d4d Mon Sep 17 00:00:00 2001 From: Aman Sinha Date: Wed, 30 Sep 2015 01:43:02 -0700 Subject: [PATCH 4/4] DRILL-3788: part 2: When building the metadata file path for the single entry case, check if the entry is a directory. --- .../org/apache/drill/exec/store/dfs/FileSelection.java | 4 ++-- .../drill/exec/store/parquet/ParquetGroupScan.java | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java index 91f5a954e1d..d5deefa6843 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java @@ -72,7 +72,7 @@ public FileSelection(List statuses, String selectionRoot) { public boolean containsDirectories(DrillFileSystem fs) throws IOException { init(fs); for (FileStatus p : statuses) { - if (p.isDir()) { + if (p.isDirectory()) { return true; } } @@ -83,7 +83,7 @@ public FileSelection minusDirectories(DrillFileSystem fs) throws IOException { init(fs); List newList = Lists.newArrayList(); for (FileStatus p : statuses) { - if (p.isDir()) { + if (p.isDirectory()) { List statuses = fs.list(true, p.getPath()); for (FileStatus s : statuses) { newList.add(s); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java index 3080d11ec86..a44daf2a5f0 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java @@ -488,8 +488,13 @@ private void init() throws IOException { List fileStatuses = null; if (entries.size() == 1) { Path p = Path.getPathWithoutSchemeAndAuthority(new Path(entries.get(0).getPath())); - Path metaPath = new Path(p, Metadata.METADATA_FILENAME); - if (fs.exists(metaPath)) { + Path metaPath = null; + if (fs.isDirectory(p)) { + // Using the metadata file makes sense when querying a directory; otherwise + // if querying a single file we can look up the metadata directly from the file + metaPath = new Path(p, Metadata.METADATA_FILENAME); + } + if (metaPath != null && fs.exists(metaPath)) { parquetTableMetadata = Metadata.readBlockMeta(fs, metaPath.toString()); } else { parquetTableMetadata = Metadata.getParquetTableMetadata(fs, p.toString());