Skip to content

Commit

Permalink
ORC-1121: Fix column coversion check bug which causes column filters …
Browse files Browse the repository at this point in the history
…don't work
  • Loading branch information
shipenglei committed Mar 4, 2022
1 parent 6b69228 commit 66aa2f8
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
13 changes: 10 additions & 3 deletions java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
public class SchemaEvolution {
// indexed by reader column id
private final TypeDescription[] readerFileTypes;
// key: file column id, value: reader column id
private final Map<Integer, Integer> typeIdsMap = new HashMap<>();
// indexed by reader column id
private final boolean[] readerIncluded;
// the offset to the first column id ignoring any ACID columns
Expand Down Expand Up @@ -126,6 +128,11 @@ public SchemaEvolution(TypeDescription fileSchema,
}
}
buildConversion(fileSchema, this.readerSchema, positionalLevels);
for (int i = 0; i < readerFileTypes.length; i++) {
if (readerFileTypes[i] != null) {
this.typeIdsMap.put(readerFileTypes[i].getId(), i);
}
}
this.positionalColumns = options.getForcePositionalEvolution();
this.ppdSafeConversion = populatePpdSafeConversion();
}
Expand Down Expand Up @@ -296,13 +303,13 @@ private boolean typesAreImplicitConversion(final TypeDescription fileType,

/**
* Check if column is safe for ppd evaluation
* @param colId reader column id
* @param colId file column id
* @return true if the specified column is safe for ppd evaluation else false
*/
public boolean isPPDSafeConversion(final int colId) {
if (hasConversion()) {
return !(colId < 0 || colId >= ppdSafeConversion.length) &&
ppdSafeConversion[colId];
Integer readerTypeId = typeIdsMap.get(colId);
return readerTypeId != null && ppdSafeConversion[readerTypeId];
}

// when there is no schema evolution PPD is safe
Expand Down
10 changes: 10 additions & 0 deletions java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,16 @@ public void testSafePpdEvaluation() throws IOException {
assertTrue(both1.isPPDSafeConversion(2));
assertTrue(both1.isPPDSafeConversion(3));
assertFalse(both1.isPPDSafeConversion(4));

// column pruning
readerStruct1 = TypeDescription.createStruct()
.addField("f2", TypeDescription.createString());
both1 = new SchemaEvolution(fileStruct1, readerStruct1, options);
assertTrue(both1.hasConversion());
assertFalse(both1.isPPDSafeConversion(0));
assertFalse(both1.isPPDSafeConversion(1));
assertTrue(both1.isPPDSafeConversion(2));
assertFalse(both1.isPPDSafeConversion(3));
}

@Test
Expand Down

0 comments on commit 66aa2f8

Please sign in to comment.