Skip to content
Permalink
Browse files
HIVE-26169: Set non-vectorized mode as default when accessing iceberg…
… tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed by Marton Bod)
  • Loading branch information
lcspinter committed Apr 26, 2022
1 parent 4c7dbd5 commit dbdcf00dd6334acaded4369fc0c1ccbdd142255e
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 7 deletions.
@@ -78,6 +78,7 @@
import org.apache.hadoop.mapred.JobStatus;
import org.apache.hadoop.mapred.OutputCommitter;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.ManifestFile;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Schema;
@@ -167,14 +168,14 @@ public HiveAuthorizationProvider getAuthorizationProvider() {
public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> map) {
overlayTableProperties(conf, tableDesc, map);
// Until the vectorized reader can handle delete files, let's fall back to non-vector mode for V2 tables
fallbackToNonVectorizedModeForV2(tableDesc.getProperties());
fallbackToNonVectorizedModeBasedOnProperties(tableDesc.getProperties());
}

@Override
public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> map) {
overlayTableProperties(conf, tableDesc, map);
// Until the vectorized reader can handle delete files, let's fall back to non-vector mode for V2 tables
fallbackToNonVectorizedModeForV2(tableDesc.getProperties());
fallbackToNonVectorizedModeBasedOnProperties(tableDesc.getProperties());
// For Tez, setting the committer here is enough to make sure it'll be part of the jobConf
map.put("mapred.output.committer.class", HiveIcebergNoJobCommitter.class.getName());
// For MR, the jobConf is set only in configureJobConf, so we're setting the write key here to detect it over there
@@ -744,8 +745,17 @@ private String collectColumnAndReplaceDummyValues(ExprNodeDesc node, String foun
return column;
}

private void fallbackToNonVectorizedModeForV2(Properties tableProps) {
if ("2".equals(tableProps.get(TableProperties.FORMAT_VERSION))) {
/**
* If any of the following checks is true we fall back to non vectorized mode:
* <ul>
* <li>iceberg format-version is "2"</li>
* <li>fileformat is set to avro</li>
* </ul>
* @param tableProps table properties, must be not null
*/
private void fallbackToNonVectorizedModeBasedOnProperties(Properties tableProps) {
if ("2".equals(tableProps.get(TableProperties.FORMAT_VERSION)) ||
FileFormat.AVRO.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT))) {
conf.setBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname, false);
}
}
@@ -112,8 +112,7 @@ public static Collection<Object[]> parameters() {
if (javaVersion.equals("1.8")) {
testParams.add(new Object[] {fileFormat, engine, TestTables.TestTableType.HIVE_CATALOG, false});
// test for vectorization=ON in case of ORC and PARQUET format with Tez engine
if ((fileFormat == FileFormat.ORC || fileFormat == FileFormat.PARQUET) &&
"tez".equals(engine) && MetastoreUtil.hive3PresentOnClasspath()) {
if (fileFormat != FileFormat.METADATA && "tez".equals(engine) && MetastoreUtil.hive3PresentOnClasspath()) {
testParams.add(new Object[] {fileFormat, engine, TestTables.TestTableType.HIVE_CATALOG, true});
}
}
@@ -63,7 +63,6 @@ public void testScanTable() throws IOException {
@Test
public void testCBOWithSelectedColumnsNonOverlapJoin() throws IOException {
shell.setHiveSessionValue("hive.cbo.enable", true);

testTables.createTable(shell, "products", PRODUCT_SCHEMA, fileFormat, PRODUCT_RECORDS);
testTables.createTable(shell, "orders", ORDER_SCHEMA, fileFormat, ORDER_RECORDS);

@@ -190,6 +189,7 @@ public void testSpecialCharacters() {

@Test
public void testScanTableCaseInsensitive() throws IOException {
shell.setHiveSessionValue(InputFormatConfig.CASE_SENSITIVE, false);
testTables.createTable(shell, "customers",
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA_WITH_UPPERCASE, fileFormat,
HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);

0 comments on commit dbdcf00

Please sign in to comment.