From 880ac4d90047f47f9b705ccb33a77b02fda18bc3 Mon Sep 17 00:00:00 2001 From: Laszlo Bodor Date: Mon, 5 Aug 2019 14:33:13 +0200 Subject: [PATCH] ORC-540: PPD: Positional lookups for columns with Options.forcePositionalEvolution(true) --- .../org/apache/orc/impl/RecordReaderImpl.java | 11 +++++++- .../org/apache/orc/impl/SchemaEvolution.java | 15 ++++++++-- .../apache/orc/impl/TestRecordReaderImpl.java | 28 +++++++++++++++++-- 3 files changed, 49 insertions(+), 5 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java index d6ae7d5b38..de5957955a 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java @@ -97,8 +97,17 @@ public class RecordReaderImpl implements RecordReader { static int findColumns(SchemaEvolution evolution, String columnName) { try { - return evolution.getFileSchema().findSubtype(columnName).getId(); + final TypeDescription targetSchema; + if (evolution.getPositionalColumns()) { + targetSchema = evolution.getReaderBaseSchema(); + } else { + targetSchema = evolution.getFileSchema(); + } + return targetSchema.findSubtype(columnName).getId(); } catch (IllegalArgumentException e) { + if (LOG.isDebugEnabled()){ + LOG.debug("{}", e.getMessage()); + } return -1; } } diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java index d93c9bd598..72394ce6bb 100644 --- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java +++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java @@ -59,6 +59,9 @@ public class SchemaEvolution { // indexed by reader column id private final boolean[] ppdSafeConversion; + // columns are indexed, not named between Reader & File schema + private final boolean positionalColumns; + private static final Logger LOG = LoggerFactory.getLogger(SchemaEvolution.class); private static final Pattern missingMetadataPattern = @@ -82,8 +85,8 @@ public SchemaEvolution(TypeDescription fileSchema, this.hasConversion = false; this.isOnlyImplicitConversion = true; this.fileSchema = fileSchema; - isAcid = checkAcidSchema(fileSchema); - includeAcidColumns = options.getIncludeAcidColumns(); + this.isAcid = checkAcidSchema(fileSchema); + this.includeAcidColumns = options.getIncludeAcidColumns(); this.readerColumnOffset = isAcid ? acidEventFieldNames.size() : 0; if (readerSchema != null) { if (isAcid) { @@ -134,6 +137,7 @@ public SchemaEvolution(TypeDescription fileSchema, } buildIdentityConversion(this.readerSchema); } + this.positionalColumns = options.getForcePositionalEvolution(); this.ppdSafeConversion = populatePpdSafeConversion(); } @@ -236,6 +240,13 @@ public boolean[] getReaderIncluded() { public boolean[] getFileIncluded() { return fileIncluded; } + + /** + * Get whether the columns are handled via position or name + */ + public boolean getPositionalColumns() { + return this.positionalColumns; + } /** * Determine if there is implicit conversion from a file to reader type. diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java index 0763363b0b..db0dbd031c 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java @@ -68,6 +68,7 @@ import org.apache.orc.util.BloomFilter; import org.apache.orc.DataReader; import org.apache.orc.RecordReader; +import org.apache.orc.TestVectorOrcFile; import org.apache.orc.TypeDescription; import org.apache.orc.Writer; import org.apache.orc.impl.RecordReaderImpl.Location; @@ -84,8 +85,6 @@ import org.apache.orc.util.BloomFilterIO; import org.apache.orc.util.BloomFilterUtf8; import org.junit.Test; -import org.mockito.MockSettings; -import org.mockito.Mockito; public class TestRecordReaderImpl { @@ -102,6 +101,31 @@ public void testFindColumn() throws Exception { assertEquals(3, RecordReaderImpl.findColumns(evo, "e")); } + @Test + public void testForcePositionalEvolution() throws Exception { + Configuration conf = new Configuration(); + + Path oldFilePath = new Path(TestVectorOrcFile.getFileFromClasspath("orc-file-11-format.orc")); + Reader reader = OrcFile.createReader(oldFilePath, + OrcFile.readerOptions(conf).filesystem(FileSystem.getLocal(conf))); + + TypeDescription fileSchema = + TypeDescription.fromString("struct>>,col10:array>," + + "col11:map>,col12:timestamp," + + "col13:decimal(38,10)>"); + + SchemaEvolution evo = new SchemaEvolution(fileSchema, reader.getSchema(), + new Reader.Options(conf).forcePositionalEvolution(true)); + assertEquals(4, RecordReaderImpl.findColumns(evo, "int1")); + + evo = new SchemaEvolution(fileSchema, reader.getSchema(), + new Reader.Options(conf).forcePositionalEvolution(false)); + assertEquals(-1, RecordReaderImpl.findColumns(evo, "int1")); + } + /** * Create a predicate leaf. This is used by another test. */