diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 3b422105668b..605f6f51772b 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -5693,10 +5693,6 @@ public static enum ConfVars { HIVE_SERVER2_ICEBERG_METADATA_GENERATOR_THREADS("hive.server2.iceberg.metadata.generator.threads", 10, "Number of threads used to scan partition directories for data files and update/generate iceberg metadata"), - HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES("hive.iceberg.metadata.refresh.max.retries", 2, - "Max retry count for trying to access the metadata location in order to refresh metadata during " + - " Iceberg table load."), - /* BLOBSTORE section */ HIVE_BLOBSTORE_SUPPORTED_SCHEMES("hive.blobstore.supported.schemes", "s3,s3a,s3n", diff --git a/iceberg/checkstyle/checkstyle.xml b/iceberg/checkstyle/checkstyle.xml index 2911b45ed25b..a288af5de908 100644 --- a/iceberg/checkstyle/checkstyle.xml +++ b/iceberg/checkstyle/checkstyle.xml @@ -46,6 +46,21 @@ + + + + + + + + + + + + diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index 4737dd64d520..b97ff3daa8b3 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -36,10 +36,12 @@ import org.apache.hadoop.hive.metastore.api.UnknownDBException; import org.apache.iceberg.BaseMetastoreCatalog; import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.BaseTable; import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.ClientPool; import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.TableMetadataParser; import org.apache.iceberg.TableOperations; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; @@ -49,49 +51,31 @@ import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.hadoop.HadoopFileIO; import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class HiveCatalog extends BaseMetastoreCatalog implements SupportsNamespaces, Configurable { + public static final String LIST_ALL_TABLES = "list-all-tables"; + public static final String LIST_ALL_TABLES_DEFAULT = "false"; + private static final Logger LOG = LoggerFactory.getLogger(HiveCatalog.class); private String name; private Configuration conf; private FileIO fileIO; private ClientPool clients; + private boolean listAllTables = false; public HiveCatalog() { } - /** - * Hive Catalog constructor. - * - * @param conf Hadoop Configuration - * @deprecated please use the no-arg constructor, setConf and initialize to construct the catalog. Will be removed in - * v0.13.0 - */ - @Deprecated - public HiveCatalog(Configuration conf) { - this.name = "hive"; - this.conf = conf; - this.fileIO = new HadoopFileIO(conf); - Map properties = ImmutableMap.of( - CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS, - conf.get(CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS, - String.valueOf(CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS_DEFAULT)), - CatalogProperties.CLIENT_POOL_SIZE, - conf.get(CatalogProperties.CLIENT_POOL_SIZE, - String.valueOf(CatalogProperties.CLIENT_POOL_SIZE_DEFAULT)) - ); - this.clients = new CachedClientPool(conf, properties); - } - @Override public void initialize(String inputName, Map properties) { this.name = inputName; @@ -108,6 +92,8 @@ public void initialize(String inputName, Map properties) { this.conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, properties.get(CatalogProperties.WAREHOUSE_LOCATION)); } + this.listAllTables = Boolean.parseBoolean(properties.getOrDefault(LIST_ALL_TABLES, LIST_ALL_TABLES_DEFAULT)); + String fileIOImpl = properties.get(CatalogProperties.FILE_IO_IMPL); this.fileIO = fileIOImpl == null ? new HadoopFileIO(conf) : CatalogUtil.loadFileIO(fileIOImpl, properties, conf); @@ -122,12 +108,20 @@ public List listTables(Namespace namespace) { try { List tableNames = clients.run(client -> client.getAllTables(database)); - List tableObjects = clients.run(client -> client.getTableObjectsByName(database, tableNames)); - List tableIdentifiers = tableObjects.stream() - .filter(table -> table.getParameters() == null ? false : BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE - .equalsIgnoreCase(table.getParameters().get(BaseMetastoreTableOperations.TABLE_TYPE_PROP))) - .map(table -> TableIdentifier.of(namespace, table.getTableName())) - .collect(Collectors.toList()); + List tableIdentifiers; + + if (listAllTables) { + tableIdentifiers = tableNames.stream() + .map(t -> TableIdentifier.of(namespace, t)) + .collect(Collectors.toList()); + } else { + List
tableObjects = clients.run(client -> client.getTableObjectsByName(database, tableNames)); + tableIdentifiers = tableObjects.stream() + .filter(table -> table.getParameters() != null && BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE + .equalsIgnoreCase(table.getParameters().get(BaseMetastoreTableOperations.TABLE_TYPE_PROP))) + .map(table -> TableIdentifier.of(namespace, table.getTableName())) + .collect(Collectors.toList()); + } LOG.debug("Listing of namespace: {} resulted in the following tables: {}", namespace, tableIdentifiers); return tableIdentifiers; @@ -235,6 +229,23 @@ public void renameTable(TableIdentifier from, TableIdentifier originalTo) { } } + @Override + public org.apache.iceberg.Table registerTable(TableIdentifier identifier, String metadataFileLocation) { + Preconditions.checkArgument(isValidIdentifier(identifier), "Invalid identifier: %s", identifier); + + // Throw an exception if this table already exists in the catalog. + if (tableExists(identifier)) { + throw new org.apache.iceberg.exceptions.AlreadyExistsException("Table already exists: %s", identifier); + } + + TableOperations ops = newTableOps(identifier); + InputFile metadataFile = fileIO.newInputFile(metadataFileLocation); + TableMetadata metadata = TableMetadataParser.read(ops.io(), metadataFile); + ops.commit(null, metadata); + + return new BaseTable(ops, identifier.toString()); + } + @Override public void createNamespace(Namespace namespace, Map meta) { Preconditions.checkArgument( @@ -533,4 +544,9 @@ public void setConf(Configuration conf) { public Configuration getConf() { return conf; } + + @VisibleForTesting + void setListAllTables(boolean listAllTables) { + this.listAllTables = listAllTables; + } } diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalogs.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalogs.java deleted file mode 100644 index 4d8e0133ffae..000000000000 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalogs.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.iceberg.hive; - -import com.github.benmanes.caffeine.cache.Cache; -import com.github.benmanes.caffeine.cache.Caffeine; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.iceberg.CatalogUtil; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; - -public final class HiveCatalogs { - - private static final Cache CATALOG_CACHE = Caffeine.newBuilder().build(); - - private HiveCatalogs() { - } - - /** - * @deprecated please use the no-arg constructor, setConf and initialize to construct the catalog. Will be removed in - * v0.12.0 - */ - @Deprecated - public static HiveCatalog loadCatalog(Configuration conf) { - // metastore URI can be null in local mode - String metastoreUri = conf.get(HiveConf.ConfVars.METASTOREURIS.varname, ""); - return CATALOG_CACHE.get(metastoreUri, uri -> (HiveCatalog) - CatalogUtil.loadCatalog(HiveCatalog.class.getName(), "hive", ImmutableMap.of(), conf)); - } -} diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveClientPool.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveClientPool.java index e322792ffcb8..e9a3c53519be 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveClientPool.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveClientPool.java @@ -21,6 +21,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaHookLoader; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.RetryingMetaStoreClient; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -35,9 +37,8 @@ public class HiveClientPool extends ClientPoolImpl // use appropriate ctor depending on whether we're working with Hive1, Hive2, or Hive3 dependencies // we need to do this because there is a breaking API change between Hive1, Hive2, and Hive3 private static final DynMethods.StaticMethod GET_CLIENT = DynMethods.builder("getProxy") - .impl(RetryingMetaStoreClient.class, HiveConf.class) - .impl(RetryingMetaStoreClient.class, HiveConf.class, Boolean.TYPE) - .impl(RetryingMetaStoreClient.class, Configuration.class, Boolean.TYPE) + .impl(RetryingMetaStoreClient.class, HiveConf.class, HiveMetaHookLoader.class, String.class) // Hive 1 and 2 + .impl(RetryingMetaStoreClient.class, Configuration.class, HiveMetaHookLoader.class, String.class) // Hive 3 .buildStatic(); private final HiveConf hiveConf; @@ -53,7 +54,7 @@ public HiveClientPool(int poolSize, Configuration conf) { protected IMetaStoreClient newClient() { try { try { - return GET_CLIENT.invoke(hiveConf, true); + return GET_CLIENT.invoke(hiveConf, (HiveMetaHookLoader) tbl -> null, HiveMetaStoreClient.class.getName()); } catch (RuntimeException e) { // any MetaException would be wrapped into RuntimeException during reflection, so let's double-check type here if (e.getCause() instanceof MetaException) { diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java index 5968693cce29..422546c11277 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java @@ -19,7 +19,6 @@ package org.apache.iceberg.hive; -import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; @@ -30,6 +29,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.iceberg.Schema; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.slf4j.Logger; @@ -62,7 +62,7 @@ static Type convert(TypeInfo typeInfo, boolean autoConvert) { } List convertInternal(List names, List typeInfos, List comments) { - List result = new ArrayList<>(names.size()); + List result = Lists.newArrayListWithExpectedSize(names.size()); for (int i = 0; i < names.size(); ++i) { result.add(Types.NestedField.optional(id++, names.get(i), convertType(typeInfos.get(i)), comments.isEmpty() || i >= comments.size() ? null : comments.get(i))); diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java index cc9ad46ac15b..0ebd0571a477 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java @@ -19,7 +19,6 @@ package org.apache.iceberg.hive; -import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; @@ -31,6 +30,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -72,9 +72,9 @@ public static Schema convert(List fieldSchemas) { * @return An equivalent Iceberg Schema */ public static Schema convert(List fieldSchemas, boolean autoConvert) { - List names = new ArrayList<>(fieldSchemas.size()); - List typeInfos = new ArrayList<>(fieldSchemas.size()); - List comments = new ArrayList<>(fieldSchemas.size()); + List names = Lists.newArrayListWithExpectedSize(fieldSchemas.size()); + List typeInfos = Lists.newArrayListWithExpectedSize(fieldSchemas.size()); + List comments = Lists.newArrayListWithExpectedSize(fieldSchemas.size()); for (FieldSchema col : fieldSchemas) { names.add(col.getName()); @@ -237,10 +237,10 @@ public static Pair> getReorderedColumn(List missingFromFirst = new ArrayList<>(); - private final List missingFromSecond = new ArrayList<>(); - private final List typeChanged = new ArrayList<>(); - private final List commentChanged = new ArrayList<>(); + private final List missingFromFirst = Lists.newArrayList(); + private final List missingFromSecond = Lists.newArrayList(); + private final List typeChanged = Lists.newArrayList(); + private final List commentChanged = Lists.newArrayList(); public List getMissingFromFirst() { return missingFromFirst; diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 3ad450cc440f..6afcd7140887 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -24,7 +24,6 @@ import java.net.InetAddress; import java.net.UnknownHostException; import java.util.Collections; -import java.util.HashMap; import java.util.Locale; import java.util.Map; import java.util.Objects; @@ -36,9 +35,9 @@ import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.LockComponent; import org.apache.hadoop.hive.metastore.api.LockLevel; import org.apache.hadoop.hive.metastore.api.LockRequest; @@ -61,6 +60,7 @@ import org.apache.iceberg.exceptions.CommitStateUnknownException; import org.apache.iceberg.exceptions.NoSuchIcebergTableException; import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.hadoop.ConfigProperties; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; @@ -86,10 +86,12 @@ public class HiveTableOperations extends BaseMetastoreTableOperations { private static final String HIVE_ACQUIRE_LOCK_TIMEOUT_MS = "iceberg.hive.lock-timeout-ms"; private static final String HIVE_LOCK_CHECK_MIN_WAIT_MS = "iceberg.hive.lock-check-min-wait-ms"; private static final String HIVE_LOCK_CHECK_MAX_WAIT_MS = "iceberg.hive.lock-check-max-wait-ms"; + private static final String HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES = "iceberg.hive.metadata-refresh-max-retries"; private static final String HIVE_TABLE_LEVEL_LOCK_EVICT_MS = "iceberg.hive.table-level-lock-evict-ms"; private static final long HIVE_ACQUIRE_LOCK_TIMEOUT_MS_DEFAULT = 3 * 60 * 1000; // 3 minutes private static final long HIVE_LOCK_CHECK_MIN_WAIT_MS_DEFAULT = 50; // 50 milliseconds private static final long HIVE_LOCK_CHECK_MAX_WAIT_MS_DEFAULT = 5 * 1000; // 5 seconds + private static final int HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES_DEFAULT = 2; private static final long HIVE_TABLE_LEVEL_LOCK_EVICT_MS_DEFAULT = TimeUnit.MINUTES.toMillis(10); private static final BiMap ICEBERG_TO_HMS_TRANSLATION = ImmutableBiMap.of( @@ -97,9 +99,6 @@ public class HiveTableOperations extends BaseMetastoreTableOperations { GC_ENABLED, "external.table.purge" ); - // Should be in org.apache.iceberg.hadoop.ConfigProperties, but that is not ported to Hive codebase - public static final String KEEP_HIVE_STATS = "iceberg.hive.keep.stats"; - private static Cache commitLockCache; private static synchronized void initTableLevelLockCache(long evictionTimeout) { @@ -140,6 +139,7 @@ private static class WaitingForLockException extends RuntimeException { private final long lockAcquireTimeout; private final long lockCheckMinWaitTime; private final long lockCheckMaxWaitTime; + private final int metadataRefreshMaxRetries; private final FileIO fileIO; private final ClientPool metaClients; @@ -157,6 +157,8 @@ protected HiveTableOperations(Configuration conf, ClientPool metaClients, FileIO conf.getLong(HIVE_LOCK_CHECK_MIN_WAIT_MS, HIVE_LOCK_CHECK_MIN_WAIT_MS_DEFAULT); this.lockCheckMaxWaitTime = conf.getLong(HIVE_LOCK_CHECK_MAX_WAIT_MS, HIVE_LOCK_CHECK_MAX_WAIT_MS_DEFAULT); + this.metadataRefreshMaxRetries = + conf.getInt(HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES, HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES_DEFAULT); long tableLevelLockCacheEvictionTimeout = conf.getLong(HIVE_TABLE_LEVEL_LOCK_EVICT_MS, HIVE_TABLE_LEVEL_LOCK_EVICT_MS_DEFAULT); initTableLevelLockCache(tableLevelLockCacheEvictionTimeout); @@ -195,16 +197,16 @@ protected void doRefresh() { throw new RuntimeException("Interrupted during refresh", e); } - refreshFromMetadataLocation(metadataLocation, HiveConf.getIntVar(conf, - HiveConf.ConfVars.HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES)); + refreshFromMetadataLocation(metadataLocation, metadataRefreshMaxRetries); } @SuppressWarnings("checkstyle:CyclomaticComplexity") @Override protected void doCommit(TableMetadata base, TableMetadata metadata) { - String newMetadataLocation = writeNewMetadata(metadata, currentVersion() + 1); + String newMetadataLocation = base == null && metadata.metadataFileLocation() != null ? + metadata.metadataFileLocation() : writeNewMetadata(metadata, currentVersion() + 1); boolean hiveEngineEnabled = hiveEngineEnabled(metadata, conf); - boolean keepHiveStats = conf.getBoolean(KEEP_HIVE_STATS, false); + boolean keepHiveStats = conf.getBoolean(ConfigProperties.KEEP_HIVE_STATS, false); CommitStatus commitStatus = CommitStatus.FAILURE; boolean updateHiveTable = false; @@ -253,7 +255,7 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { Map summary = Optional.ofNullable(metadata.currentSnapshot()) .map(Snapshot::summary) .orElseGet(ImmutableMap::of); - setHmsTableParameters(newMetadataLocation, tbl, metadata.properties(), removedProps, hiveEngineEnabled, summary); + setHmsTableParameters(newMetadataLocation, tbl, metadata, removedProps, hiveEngineEnabled, summary); if (!keepHiveStats) { StatsSetupConst.setBasicStatsState(tbl.getParameters(), StatsSetupConst.FALSE); @@ -263,29 +265,32 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { try { persistTable(tbl, updateHiveTable); commitStatus = CommitStatus.SUCCESS; - } catch (Throwable persistFailure) { + } catch (org.apache.hadoop.hive.metastore.api.AlreadyExistsException e) { + throw new AlreadyExistsException(e, "Table already exists: %s.%s", database, tableName); + + } catch (InvalidObjectException e) { + throw new ValidationException(e, "Invalid Hive object for %s.%s", database, tableName); + + } catch (Throwable e) { + if (e.getMessage() != null && e.getMessage().contains("Table/View 'HIVE_LOCKS' does not exist")) { + throw new RuntimeException("Failed to acquire locks from metastore because the underlying metastore " + + "table 'HIVE_LOCKS' does not exist. This can occur when using an embedded metastore which does not " + + "support transactions. To fix this use an alternative metastore.", e); + } + LOG.error("Cannot tell if commit to {}.{} succeeded, attempting to reconnect and check.", - database, tableName, persistFailure); + database, tableName, e); commitStatus = checkCommitStatus(newMetadataLocation, metadata); switch (commitStatus) { case SUCCESS: break; case FAILURE: - throw persistFailure; + throw e; case UNKNOWN: - throw new CommitStateUnknownException(persistFailure); + throw new CommitStateUnknownException(e); } } - } catch (org.apache.hadoop.hive.metastore.api.AlreadyExistsException e) { - throw new AlreadyExistsException("Table already exists: %s.%s", database, tableName); - } catch (TException | UnknownHostException e) { - if (e.getMessage() != null && e.getMessage().contains("Table/View 'HIVE_LOCKS' does not exist")) { - throw new RuntimeException("Failed to acquire locks from metastore because 'HIVE_LOCKS' doesn't " + - "exist, this probably happened when using embedded metastore or doesn't create a " + - "transactional meta table. To fix this, use an alternative metastore", e); - } - throw new RuntimeException(String.format("Metastore operation failed for %s.%s", database, tableName), e); } catch (InterruptedException e) { @@ -332,7 +337,7 @@ private Table newHmsTable() { Integer.MAX_VALUE, null, Collections.emptyList(), - new HashMap<>(), + Maps.newHashMap(), null, null, TableType.EXTERNAL_TABLE.toString()); @@ -341,18 +346,21 @@ private Table newHmsTable() { return newTable; } - private void setHmsTableParameters(String newMetadataLocation, Table tbl, Map icebergTableProps, + private void setHmsTableParameters(String newMetadataLocation, Table tbl, TableMetadata metadata, Set obsoleteProps, boolean hiveEngineEnabled, Map summary) { Map parameters = Optional.ofNullable(tbl.getParameters()) - .orElseGet(HashMap::new); + .orElseGet(Maps::newHashMap); // push all Iceberg table properties into HMS - icebergTableProps.forEach((key, value) -> { + metadata.properties().forEach((key, value) -> { // translate key names between Iceberg and HMS where needed String hmsKey = ICEBERG_TO_HMS_TRANSLATION.getOrDefault(key, key); parameters.put(hmsKey, value); }); + if (metadata.uuid() != null) { + parameters.put(TableProperties.UUID, metadata.uuid()); + } // remove any props from HMS that are no longer present in Iceberg table props obsoleteProps.forEach(parameters::remove); diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveMetastoreTest.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveMetastoreTest.java index f98e1a5d90fe..b1fb891f3054 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveMetastoreTest.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveMetastoreTest.java @@ -19,7 +19,6 @@ package org.apache.iceberg.hive; -import java.util.HashMap; import java.util.concurrent.TimeUnit; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; @@ -27,6 +26,7 @@ import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -47,7 +47,7 @@ public static void startMetastore() throws Exception { HiveMetastoreTest.hiveConf = metastore.hiveConf(); HiveMetastoreTest.metastoreClient = new HiveMetaStoreClient(hiveConf); String dbPath = metastore.getDatabasePath(DB_NAME); - Database db = new Database(DB_NAME, "description", dbPath, new HashMap<>()); + Database db = new Database(DB_NAME, "description", dbPath, Maps.newHashMap()); metastoreClient.createDatabase(db); HiveMetastoreTest.catalog = (HiveCatalog) CatalogUtil.loadCatalog(HiveCatalog.class.getName(), CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, ImmutableMap.of( @@ -55,7 +55,7 @@ public static void startMetastore() throws Exception { } @AfterClass - public static void stopMetastore() { + public static void stopMetastore() throws Exception { HiveMetastoreTest.catalog = null; metastoreClient.close(); diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java index 6804fc80e5b0..4c210a956355 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java @@ -22,11 +22,9 @@ import java.io.File; import java.io.IOException; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.SerDeInfo; @@ -35,6 +33,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hive.iceberg.org.apache.avro.generic.GenericData; import org.apache.hive.iceberg.org.apache.avro.generic.GenericRecordBuilder; +import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.DataFile; import org.apache.iceberg.DataFiles; import org.apache.iceberg.Files; @@ -48,6 +47,7 @@ import org.apache.iceberg.avro.AvroSchemaUtil; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.hadoop.ConfigProperties; @@ -66,6 +66,7 @@ import static java.nio.file.attribute.PosixFilePermissions.fromString; import static org.apache.iceberg.BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE; import static org.apache.iceberg.BaseMetastoreTableOperations.METADATA_LOCATION_PROP; +import static org.apache.iceberg.BaseMetastoreTableOperations.PREVIOUS_METADATA_LOCATION_PROP; import static org.apache.iceberg.BaseMetastoreTableOperations.TABLE_TYPE_PROP; import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; @@ -324,8 +325,14 @@ public void testListTables() throws TException, IOException { org.apache.hadoop.hive.metastore.api.Table hiveTable = createHiveTable(hiveTableName); metastoreClient.createTable(hiveTable); + catalog.setListAllTables(false); List tableIdents1 = catalog.listTables(TABLE_IDENTIFIER.namespace()); Assert.assertEquals("should only 1 iceberg table .", 1, tableIdents1.size()); + + catalog.setListAllTables(true); + List tableIdents2 = catalog.listTables(TABLE_IDENTIFIER.namespace()); + Assert.assertEquals("should be 2 tables in namespace .", 2, tableIdents2.size()); + Assert.assertTrue(catalog.tableExists(TABLE_IDENTIFIER)); metastoreClient.dropTable(DB_NAME, hiveTableName); } @@ -371,6 +378,51 @@ public void testNonDefaultDatabaseLocation() throws IOException, TException { metastoreClient.dropDatabase(NON_DEFAULT_DATABASE, true, true, true); } + @Test + public void testRegisterTable() throws TException { + org.apache.hadoop.hive.metastore.api.Table originalTable = metastoreClient.getTable(DB_NAME, TABLE_NAME); + + Map originalParams = originalTable.getParameters(); + Assert.assertNotNull(originalParams); + Assert.assertTrue(ICEBERG_TABLE_TYPE_VALUE.equalsIgnoreCase(originalParams.get(TABLE_TYPE_PROP))); + Assert.assertTrue("EXTERNAL_TABLE".equalsIgnoreCase(originalTable.getTableType())); + + catalog.dropTable(TABLE_IDENTIFIER, false); + Assert.assertFalse(catalog.tableExists(TABLE_IDENTIFIER)); + + List metadataVersionFiles = metadataVersionFiles(TABLE_NAME); + Assert.assertEquals(1, metadataVersionFiles.size()); + + catalog.registerTable(TABLE_IDENTIFIER, "file:" + metadataVersionFiles.get(0)); + + org.apache.hadoop.hive.metastore.api.Table newTable = metastoreClient.getTable(DB_NAME, TABLE_NAME); + + Map newTableParameters = newTable.getParameters(); + Assert.assertNull(newTableParameters.get(PREVIOUS_METADATA_LOCATION_PROP)); + Assert.assertEquals(originalParams.get(TABLE_TYPE_PROP), newTableParameters.get(TABLE_TYPE_PROP)); + Assert.assertEquals(originalParams.get(METADATA_LOCATION_PROP), newTableParameters.get(METADATA_LOCATION_PROP)); + Assert.assertEquals(originalTable.getSd(), newTable.getSd()); + } + + @Test + public void testRegisterExistingTable() throws TException { + org.apache.hadoop.hive.metastore.api.Table originalTable = metastoreClient.getTable(DB_NAME, TABLE_NAME); + + Map originalParams = originalTable.getParameters(); + Assert.assertNotNull(originalParams); + Assert.assertTrue(ICEBERG_TABLE_TYPE_VALUE.equalsIgnoreCase(originalParams.get(TABLE_TYPE_PROP))); + Assert.assertTrue("EXTERNAL_TABLE".equalsIgnoreCase(originalTable.getTableType())); + + List metadataVersionFiles = metadataVersionFiles(TABLE_NAME); + Assert.assertEquals(1, metadataVersionFiles.size()); + + // Try to register an existing table + AssertHelpers.assertThrows( + "Should complain that the table already exists", AlreadyExistsException.class, + "Table already exists", + () -> catalog.registerTable(TABLE_IDENTIFIER, "file:" + metadataVersionFiles.get(0))); + } + @Test public void testEngineHiveEnabledDefault() throws TException { // Drop the previously created table to make place for the new one @@ -415,7 +467,7 @@ public void testEngineHiveEnabledTableProperty() throws TException { catalog.dropTable(TABLE_IDENTIFIER); // Enabled by table property - also check that the hive-conf is ignored - Map tableProperties = new HashMap<>(); + Map tableProperties = Maps.newHashMap(); tableProperties.put(TableProperties.ENGINE_HIVE_ENABLED, "true"); catalog.getConf().set(ConfigProperties.ENGINE_HIVE_ENABLED, "false"); @@ -436,20 +488,19 @@ public void testEngineHiveEnabledTableProperty() throws TException { assertHiveEnabled(hmsTable, false); } - @Test(timeout = 60000, expected = NotFoundException.class) - public void testMissingMetadataWontCauseHang() throws Exception { + @Test + public void testMissingMetadataWontCauseHang() { catalog.loadTable(TABLE_IDENTIFIER); - HiveConf.setIntVar(catalog.getConf(), HiveConf.ConfVars.HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES, 3); File realLocation = new File(metadataLocation(TABLE_NAME)); File fakeLocation = new File(metadataLocation(TABLE_NAME) + "_dummy"); - realLocation.renameTo(fakeLocation); - try { - catalog.loadTable(TABLE_IDENTIFIER); - } finally { - realLocation.renameTo(realLocation); - } + Assert.assertTrue(realLocation.renameTo(fakeLocation)); + AssertHelpers.assertThrows( + "HiveTableOperations shouldn't hang indefinitely when a missing metadata file is encountered", + NotFoundException.class, + () -> catalog.loadTable(TABLE_IDENTIFIER)); + Assert.assertTrue(fakeLocation.renameTo(realLocation)); } private void assertHiveEnabled(org.apache.hadoop.hive.metastore.api.Table hmsTable, boolean expected) { diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index 9518540238a4..6ba3a46a27a6 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -21,7 +21,6 @@ import java.util.List; import java.util.Map; -import java.util.UUID; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.CachingCatalog; @@ -29,6 +28,7 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.SortOrder; import org.apache.iceberg.Table; +import org.apache.iceberg.TableProperties; import org.apache.iceberg.Transaction; import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.Namespace; @@ -54,7 +54,6 @@ import static org.apache.iceberg.types.Types.NestedField.required; public class TestHiveCatalog extends HiveMetastoreTest { - private static final String hiveLocalDir = "file:/tmp/hive/" + UUID.randomUUID().toString(); private static ImmutableMap meta = ImmutableMap.of( "owner", "apache", "group", "iceberg", @@ -272,7 +271,7 @@ public void testCreateTableCustomSortOrder() { } @Test - public void testCreateNamespace() throws TException { + public void testCreateNamespace() throws Exception { Namespace namespace1 = Namespace.of("noLocation"); catalog.createNamespace(namespace1, meta); Database database1 = metastoreClient.getDatabase(namespace1.toString()); @@ -287,6 +286,9 @@ public void testCreateNamespace() throws TException { AlreadyExistsException.class, "Namespace '" + namespace1 + "' already exists!", () -> { catalog.createNamespace(namespace1); }); + String hiveLocalDir = temp.newFolder().toURI().toString(); + // remove the trailing slash of the URI + hiveLocalDir = hiveLocalDir.substring(0, hiveLocalDir.length() - 1); ImmutableMap newMeta = ImmutableMap.builder() .putAll(meta) .put("location", hiveLocalDir) @@ -441,4 +443,29 @@ private String defaultUri(Namespace namespace) throws TException { "hive.metastore.warehouse.external.dir", "") + "/" + namespace.level(0) + ".db"; } + @Test + public void testUUIDinTableProperties() throws Exception { + Schema schema = new Schema( + required(1, "id", Types.IntegerType.get(), "unique ID"), + required(2, "data", Types.StringType.get()) + ); + TableIdentifier tableIdentifier = TableIdentifier.of(DB_NAME, "tbl"); + String location = temp.newFolder("tbl").toString(); + + try { + catalog.buildTable(tableIdentifier, schema) + .withLocation(location) + .create(); + + String tableName = tableIdentifier.name(); + org.apache.hadoop.hive.metastore.api.Table hmsTable = + metastoreClient.getTable(tableIdentifier.namespace().level(0), tableName); + + // check parameters are in expected state + Map parameters = hmsTable.getParameters(); + Assert.assertNotNull(parameters.get(TableProperties.UUID)); + } finally { + catalog.dropTable(tableIdentifier); + } + } } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java index 5c0a22eba988..e22374b6e975 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java @@ -24,9 +24,13 @@ import java.util.concurrent.atomic.AtomicLong; import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.HasTableOperations; +import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.CommitStateUnknownException; +import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.types.Types; import org.apache.thrift.TException; import org.junit.Assert; @@ -77,10 +81,11 @@ public void testSuppressUnlockExceptions() throws TException, InterruptedExcepti } /** - * Pretends we throw an error while persisting that actually fails to commit serverside + * Pretends we throw an error while persisting, and not found with check state, commit state should be treated as + * unknown, because in reality the persisting may still succeed, just not yet by the time of checking. */ @Test - public void testThriftExceptionFailureOnCommit() throws TException, InterruptedException { + public void testThriftExceptionUnknownStateIfNotInHistoryFailureOnCommit() throws TException, InterruptedException { Table table = catalog.loadTable(TABLE_IDENTIFIER); HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations(); @@ -100,17 +105,15 @@ public void testThriftExceptionFailureOnCommit() throws TException, InterruptedE failCommitAndThrowException(spyOps); - AssertHelpers.assertThrows("We should rethrow generic runtime errors if the " + - "commit actually doesn't succeed", CommitStateUnknownException.class, - "Cannot determine whether the commit was successful or not, the underlying data files may " + - "or may not be needed. Manual intervention via the Remove Orphan Files Action can remove these files " + - "when a connection to the Catalog can be re-established if the commit was actually unsuccessful.", - () -> spyOps.commit(metadataV2, metadataV1)); + AssertHelpers.assertThrows("We should assume commit state is unknown if the " + + "new location is not found in history in commit state check", CommitStateUnknownException.class, + "Datacenter on fire", () -> spyOps.commit(metadataV2, metadataV1)); ops.refresh(); Assert.assertEquals("Current metadata should not have changed", metadataV2, ops.current()); Assert.assertTrue("Current metadata should still exist", metadataFileExists(metadataV2)); - Assert.assertEquals("New non-current metadata file should be added", 3, metadataFileCount(ops.current())); + Assert.assertEquals("New metadata files should still exist, new location not in history but" + + " the commit may still succeed", 3, metadataFileCount(ops.current())); } /** @@ -276,6 +279,21 @@ public void testThriftExceptionConcurrentCommit() throws TException, Interrupted 2, ops.current().schema().columns().size()); } + @Test + public void testInvalidObjectException() { + TableIdentifier badTi = TableIdentifier.of(DB_NAME, "£tbl"); + Assert.assertThrows(String.format("Invalid table name for %s.%s", DB_NAME, "`tbl`"), + ValidationException.class, + () -> catalog.createTable(badTi, schema, PartitionSpec.unpartitioned())); + } + + @Test + public void testAlreadyExistsException() { + Assert.assertThrows(String.format("Table already exists: %s.%s", DB_NAME, TABLE_NAME), + AlreadyExistsException.class, + () -> catalog.createTable(TABLE_IDENTIFIER, schema, PartitionSpec.unpartitioned())); + } + private void commitAndThrowException(HiveTableOperations realOperations, HiveTableOperations spyOperations) throws TException, InterruptedException { // Simulate a communication error after a successful commit diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java index a956d36031d3..76eb87b21b50 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java @@ -20,6 +20,7 @@ package org.apache.iceberg.hive; import java.io.File; +import java.io.IOException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import org.apache.hadoop.conf.Configuration; @@ -47,6 +48,7 @@ import org.apache.thrift.server.TThreadPoolServer; import org.apache.thrift.transport.TServerSocket; import org.apache.thrift.transport.TTransportFactory; +import org.junit.Assert; import static java.nio.file.Files.createTempDirectory; import static java.nio.file.attribute.PosixFilePermissions.asFileAttribute; @@ -80,9 +82,37 @@ public class TestHiveMetastore { .orNoop() .buildStatic(); - private File hiveLocalDir; - private File hiveWarehouseDir; - private File hiveExternalWarehouseDir; + // It's tricky to clear all static fields in an HMS instance in order to switch derby root dir. + // Therefore, we reuse the same derby root between tests and remove it after JVM exits. + private static final File HIVE_LOCAL_DIR; + private static final File HIVE_WAREHOUSE_DIR; + private static final File HIVE_EXTERNAL_WAREHOUSE_DIR; + private static final String DERBY_PATH; + + static { + try { + HIVE_LOCAL_DIR = createTempDirectory("hive", asFileAttribute(fromString("rwxrwxrwx"))).toFile(); + DERBY_PATH = new File(HIVE_LOCAL_DIR, "metastore_db").getPath(); + HIVE_WAREHOUSE_DIR = new File(HIVE_LOCAL_DIR, "managed"); + HIVE_EXTERNAL_WAREHOUSE_DIR = new File(HIVE_LOCAL_DIR, "external"); + File derbyLogFile = new File(HIVE_LOCAL_DIR, "derby.log"); + System.setProperty("derby.stream.error.file", derbyLogFile.getAbsolutePath()); + setupMetastoreDB("jdbc:derby:" + DERBY_PATH + ";create=true"); + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + Path localDirPath = new Path(HIVE_LOCAL_DIR.getAbsolutePath()); + FileSystem fs = Util.getFs(localDirPath, new Configuration()); + String errMsg = "Failed to delete " + localDirPath; + try { + Assert.assertTrue(errMsg, fs.delete(localDirPath, true)); + } catch (IOException e) { + throw new RuntimeException(errMsg, e); + } + })); + } catch (Exception e) { + throw new RuntimeException("Failed to setup local dir for hive metastore", e); + } + } + private HiveConf hiveConf; private ExecutorService executorService; private TServer server; @@ -111,17 +141,6 @@ public void start(HiveConf conf) { */ public void start(HiveConf conf, int poolSize) { try { - hiveLocalDir = createTempDirectory("hive", asFileAttribute(fromString("rwxrwxrwx"))).toFile(); - hiveWarehouseDir = new File(hiveLocalDir, "managed"); - hiveExternalWarehouseDir = new File(hiveLocalDir, "external"); - File derbyLogFile = new File(hiveLocalDir, "derby.log"); - System.setProperty("derby.stream.error.file", derbyLogFile.getAbsolutePath()); - - // create and initialize HMS backend DB for ACID and non-ACID tables as well - MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CONNECT_URL_KEY, - "jdbc:derby:" + getDerbyPath() + ";create=true"); - TestTxnDbUtil.prepDb(conf); - TServerSocket socket = new TServerSocket(0); int port = socket.getServerSocket().getLocalPort(); initConf(conf, port); @@ -140,7 +159,8 @@ public void start(HiveConf conf, int poolSize) { } } - public void stop() { + public void stop() throws Exception { + reset(); if (clientPool != null) { clientPool.close(); } @@ -150,9 +170,6 @@ public void stop() { if (executorService != null) { executorService.shutdown(); } - if (hiveLocalDir != null) { - hiveLocalDir.delete(); - } if (baseHandler != null) { baseHandler.shutdown(); } @@ -164,29 +181,31 @@ public HiveConf hiveConf() { } public String getDatabasePath(String dbName) { - File dbDir = new File(hiveExternalWarehouseDir, dbName + ".db"); + File dbDir = new File(HIVE_LOCAL_DIR, dbName + ".db"); return dbDir.getPath(); } public void reset() throws Exception { - for (String dbName : clientPool.run(client -> client.getAllDatabases())) { - for (String tblName : clientPool.run(client -> client.getAllTables(dbName))) { - clientPool.run(client -> { - client.dropTable(dbName, tblName, true, true, true); - return null; - }); - } + if (clientPool != null) { + for (String dbName : clientPool.run(client -> client.getAllDatabases())) { + for (String tblName : clientPool.run(client -> client.getAllTables(dbName))) { + clientPool.run(client -> { + client.dropTable(dbName, tblName, true, true, true); + return null; + }); + } - if (!DEFAULT_DATABASE_NAME.equals(dbName)) { - // Drop cascade, functions dropped by cascade - clientPool.run(client -> { - client.dropDatabase(dbName, true, true, true); - return null; - }); + if (!DEFAULT_DATABASE_NAME.equals(dbName)) { + // Drop cascade, functions dropped by cascade + clientPool.run(client -> { + client.dropDatabase(dbName, true, true, true); + return null; + }); + } } } - Path warehouseRoot = new Path(hiveLocalDir.getAbsolutePath()); + Path warehouseRoot = new Path(HIVE_LOCAL_DIR.getAbsolutePath()); FileSystem fs = Util.getFs(warehouseRoot, hiveConf); for (FileStatus fileStatus : fs.listStatus(warehouseRoot)) { if (!fileStatus.getPath().getName().equals("derby.log") && @@ -210,7 +229,7 @@ public R run(ClientPool.Action action) thro private TServer newThriftServer(TServerSocket socket, int poolSize, HiveConf conf) throws Exception { HiveConf serverConf = new HiveConf(conf); - serverConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, "jdbc:derby:" + getDerbyPath() + ";create=true"); + serverConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, "jdbc:derby:" + DERBY_PATH + ";create=true"); baseHandler = HMS_HANDLER_CTOR.newInstance("new db based metaserver", serverConf); IHMSHandler handler = GET_BASE_HMS_HANDLER.invoke(serverConf, baseHandler, false); @@ -226,9 +245,9 @@ private TServer newThriftServer(TServerSocket socket, int poolSize, HiveConf con private void initConf(HiveConf conf, int port) { conf.set(HiveConf.ConfVars.METASTOREURIS.varname, "thrift://localhost:" + port); - conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, "file:" + hiveWarehouseDir.getAbsolutePath()); + conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, "file:" + HIVE_WAREHOUSE_DIR.getAbsolutePath()); conf.set(HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL.varname, - "file:" + hiveExternalWarehouseDir.getAbsolutePath()); + "file:" + HIVE_EXTERNAL_WAREHOUSE_DIR.getAbsolutePath()); conf.set(HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname, "false"); conf.set(HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES.varname, "false"); conf.set("iceberg.hive.client-pool-size", "2"); @@ -237,8 +256,10 @@ private void initConf(HiveConf conf, int port) { conf.setBoolVar(HiveConf.ConfVars.HIVE_IN_TEST, false); } - private String getDerbyPath() { - File metastoreDB = new File(hiveLocalDir, "metastore_db"); - return metastoreDB.getPath(); + private static void setupMetastoreDB(String dbURL) throws Exception { + HiveConf conf = new HiveConf(); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CONNECT_URL_KEY, + "jdbc:derby:" + DERBY_PATH + ";create=true"); + TestTxnDbUtil.prepDb(conf); } } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java index e0ad70cd8a4f..a02ed0909ace 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java @@ -19,7 +19,6 @@ package org.apache.iceberg.hive; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; @@ -30,6 +29,7 @@ import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.Schema; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.junit.Assert; @@ -120,7 +120,7 @@ public void testNotSupportedTypes() { for (FieldSchema notSupportedField : getNotSupportedFieldSchemas()) { AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, "Unsupported Hive type", () -> { - HiveSchemaUtil.convert(new ArrayList<>(Arrays.asList(notSupportedField))); + HiveSchemaUtil.convert(Lists.newArrayList(Arrays.asList(notSupportedField))); } ); } @@ -171,7 +171,7 @@ public void testConversionWithoutLastComment() { } protected List getSupportedFieldSchemas() { - List fields = new ArrayList<>(); + List fields = Lists.newArrayList(); fields.add(new FieldSchema("c_float", serdeConstants.FLOAT_TYPE_NAME, "float comment")); fields.add(new FieldSchema("c_double", serdeConstants.DOUBLE_TYPE_NAME, "double comment")); fields.add(new FieldSchema("c_boolean", serdeConstants.BOOLEAN_TYPE_NAME, "boolean comment")); @@ -186,7 +186,7 @@ protected List getSupportedFieldSchemas() { } protected List getNotSupportedFieldSchemas() { - List fields = new ArrayList<>(); + List fields = Lists.newArrayList(); fields.add(new FieldSchema("c_byte", serdeConstants.TINYINT_TYPE_NAME, "")); fields.add(new FieldSchema("c_short", serdeConstants.SMALLINT_TYPE_NAME, "")); fields.add(new FieldSchema("c_char", serdeConstants.CHAR_TYPE_NAME + "(5)", "")); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/Deserializer.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/Deserializer.java index 47e9f3e0537d..e6ba956eb2a1 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/Deserializer.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/Deserializer.java @@ -19,8 +19,6 @@ package org.apache.iceberg.mr.hive; -import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; @@ -33,6 +31,7 @@ import org.apache.iceberg.data.GenericRecord; import org.apache.iceberg.data.Record; import org.apache.iceberg.mr.hive.serde.objectinspector.WriteObjectInspector; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.schema.SchemaWithPartnerVisitor; import org.apache.iceberg.types.Type.PrimitiveType; @@ -154,7 +153,7 @@ public FieldDeserializer list(ListType listTypeInfo, ObjectInspectorPair pair, F return null; } - List result = new ArrayList<>(); + List result = Lists.newLinkedList(); ListObjectInspector listInspector = (ListObjectInspector) pair.sourceInspector(); for (Object val : listInspector.getList(o)) { @@ -173,7 +172,7 @@ public FieldDeserializer map(MapType mapType, ObjectInspectorPair pair, FieldDes return null; } - Map result = new HashMap<>(); + Map result = Maps.newHashMap(); MapObjectInspector mapObjectInspector = (MapObjectInspector) pair.sourceInspector(); for (Map.Entry entry : mapObjectInspector.getMap(o).entrySet()) { diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index a6777d30761c..b8c3187b0a0f 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -24,7 +24,6 @@ import java.net.URI; import java.net.URISyntaxException; import java.util.Collection; -import java.util.HashMap; import java.util.List; import java.util.ListIterator; import java.util.Map; @@ -271,7 +270,7 @@ public Map getBasicStatistics(Partish partish) { org.apache.hadoop.hive.ql.metadata.Table hmsTable = partish.getTable(); TableDesc tableDesc = Utilities.getTableDesc(hmsTable); Table table = Catalogs.loadTable(conf, tableDesc.getProperties()); - Map stats = new HashMap<>(); + Map stats = Maps.newHashMap(); if (table.currentSnapshot() != null) { Map summary = table.currentSnapshot().summary(); if (summary != null) { diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java index e1eca8f84854..293a7020fe00 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java @@ -20,7 +20,6 @@ package org.apache.iceberg.mr.hive; import java.io.IOException; -import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; @@ -49,6 +48,7 @@ import org.apache.iceberg.mapping.NameMapping; import org.apache.iceberg.mapping.NameMappingParser; import org.apache.iceberg.mr.Catalogs; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder; public class HiveTableUtil { @@ -89,7 +89,7 @@ public static void importFiles(String sourceLocation, dataFiles.forEach(append::appendFile); } else { PartitionSpecProxy.PartitionIterator partitionIterator = partitionSpecProxy.getPartitionIterator(); - List> tasks = new ArrayList<>(); + List> tasks = Lists.newArrayList(); while (partitionIterator.hasNext()) { Partition partition = partitionIterator.next(); Callable task = () -> { @@ -121,7 +121,7 @@ public static void importFiles(String sourceLocation, private static List getDataFiles(RemoteIterator fileStatusIterator, Map partitionKeys, String format, PartitionSpec spec, MetricsConfig metricsConfig, NameMapping nameMapping, Configuration conf) throws IOException { - List dataFiles = new ArrayList<>(); + List dataFiles = Lists.newArrayList(); while (fileStatusIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusIterator.next(); String fileName = fileStatus.getPath().getName(); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInternalRecordWrapper.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInternalRecordWrapper.java index 50e72edc8c1d..0d53924b936b 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInternalRecordWrapper.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInternalRecordWrapper.java @@ -21,13 +21,13 @@ import java.lang.reflect.Array; import java.util.Arrays; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.function.Function; import java.util.stream.Collectors; import org.apache.iceberg.StructLike; import org.apache.iceberg.data.Record; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.StructType; @@ -131,7 +131,7 @@ private IcebergInternalRecordWrapper(IcebergInternalRecordWrapper toCopy) { } private Map buildFieldPositionMap(StructType schema) { - Map nameToPosition = new HashMap<>(); + Map nameToPosition = Maps.newHashMap(); List fields = schema.fields(); for (int i = 0; i < fields.size(); i += 1) { nameToPosition.put(fields.get(i).name(), i); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java index 9d7c6420926d..95f03cdade01 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java @@ -20,9 +20,7 @@ package org.apache.iceberg.mr.hive; import java.io.IOException; -import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -38,6 +36,8 @@ import org.apache.iceberg.mr.TestHelper; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.thrift.TException; @@ -102,7 +102,7 @@ public abstract class HiveIcebergStorageHandlerWithEngineBase { @Parameters(name = "fileFormat={0}, engine={1}, catalog={2}, isVectorized={3}") public static Collection parameters() { - Collection testParams = new ArrayList<>(); + Collection testParams = Lists.newArrayList(); String javaVersion = System.getProperty("java.specification.version"); // Run tests with every FileFormat for a single Catalog (HiveCatalog) @@ -159,7 +159,7 @@ public static void beforeClass() { } @AfterClass - public static void afterClass() { + public static void afterClass() throws Exception { shell.stop(); } @@ -190,7 +190,7 @@ public void after() throws Exception { protected void validateBasicStats(Table icebergTable, String dbName, String tableName) throws TException, InterruptedException { Map hmsParams = shell.metastore().getTable(dbName, tableName).getParameters(); - Map summary = new HashMap<>(); + Map summary = Maps.newHashMap(); if (icebergTable.currentSnapshot() == null) { for (String key : STATS_MAPPING.values()) { summary.put(key, "0"); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergTestUtils.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergTestUtils.java index 62fd2adfc5e6..79e3dfee9e95 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergTestUtils.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergTestUtils.java @@ -33,7 +33,6 @@ import java.time.OffsetDateTime; import java.time.ZoneId; import java.time.ZoneOffset; -import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.List; @@ -74,6 +73,7 @@ import org.apache.iceberg.hadoop.HadoopOutputFile; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileAppenderFactory; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.ArrayUtil; import org.apache.iceberg.util.ByteBuffers; @@ -235,7 +235,7 @@ public static void assertEquals(Record expected, Record actual) { public static void validateData(Table table, List expected, int sortBy) throws IOException { // Refresh the table, so we get the new data as well table.refresh(); - List records = new ArrayList<>(expected.size()); + List records = Lists.newArrayListWithExpectedSize(expected.size()); try (CloseableIterable iterable = IcebergGenerics.read(table).build()) { iterable.forEach(records::add); } @@ -251,8 +251,8 @@ public static void validateData(Table table, List expected, int sortBy) * @param sortBy The column position by which we will sort */ public static void validateData(List expected, List actual, int sortBy) { - List sortedExpected = new ArrayList<>(expected); - List sortedActual = new ArrayList<>(actual); + List sortedExpected = Lists.newArrayList(expected); + List sortedActual = Lists.newArrayList(actual); // Sort based on the specified column sortedExpected.sort(Comparator.comparingInt(record -> record.get(sortBy).hashCode())); sortedActual.sort(Comparator.comparingInt(record -> record.get(sortBy).hashCode())); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java index 9b8def1bd195..8545447cd2d9 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java @@ -20,7 +20,6 @@ package org.apache.iceberg.mr.hive; import java.io.IOException; -import java.util.ArrayList; import java.util.List; import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.FileFormat; @@ -103,7 +102,7 @@ public void testInsertFromSelect() throws IOException { shell.executeStatement("INSERT INTO customers SELECT * FROM customers"); // Check that everything is duplicated as expected - List records = new ArrayList<>(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + List records = Lists.newArrayList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); records.addAll(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); HiveIcebergTestUtils.validateData(table, records, 0); } @@ -145,7 +144,7 @@ public void testInsertOverwritePartitionedTable() throws IOException { spec, fileFormat, ImmutableList.of()); // IOW into empty target table -> whole source result set is inserted - List expected = new ArrayList<>(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + List expected = Lists.newArrayList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); expected.add(TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) .add(8L, "Sue", "Green").build().get(0)); // add one more to 'Green' so we have a partition w/ multiple records shell.executeStatement(testTables.getInsertQuery(expected, target, true)); @@ -160,7 +159,7 @@ public void testInsertOverwritePartitionedTable() throws IOException { .build(); shell.executeStatement(testTables.getInsertQuery(newRecords, target, true)); - expected = new ArrayList<>(newRecords); + expected = Lists.newArrayList(newRecords); expected.add(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(2)); // existing, untouched partition ('Pink') HiveIcebergTestUtils.validateData(table, expected, 0); @@ -197,7 +196,7 @@ public void testInsertFromSelectWithOrderBy() throws IOException { shell.executeStatement("INSERT INTO customers SELECT * FROM customers ORDER BY customer_id"); // Check that everything is duplicated as expected - List records = new ArrayList<>(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + List records = Lists.newArrayList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); records.addAll(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); HiveIcebergTestUtils.validateData(table, records, 0); } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergOutputCommitter.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergOutputCommitter.java index 2e3f5aaf75c9..f5a4a4377d74 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergOutputCommitter.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergOutputCommitter.java @@ -20,7 +20,6 @@ package org.apache.iceberg.mr.hive; import java.io.IOException; -import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; @@ -48,6 +47,7 @@ import org.apache.iceberg.mr.TestHelper; import org.apache.iceberg.mr.mapred.Container; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.SerializationUtil; @@ -259,7 +259,7 @@ private JobConf jobConf(Table table, int taskNum) { */ private List writeRecords(String name, int taskNum, int attemptNum, boolean commitTasks, boolean abortTasks, JobConf conf, OutputCommitter committer) throws IOException { - List expected = new ArrayList<>(RECORD_NUM * taskNum); + List expected = Lists.newArrayListWithExpectedSize(RECORD_NUM * taskNum); Table table = HiveIcebergStorageHandler.table(conf, name); FileIO io = table.io(); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSchemaEvolution.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSchemaEvolution.java index fdf939287608..9bd3393efc5f 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSchemaEvolution.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSchemaEvolution.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.math.BigDecimal; -import java.util.ArrayList; import java.util.Comparator; import java.util.List; import org.apache.hadoop.hive.metastore.api.FieldSchema; @@ -33,6 +32,7 @@ import org.apache.iceberg.hive.HiveSchemaUtil; import org.apache.iceberg.mr.TestHelper; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; import org.apache.thrift.TException; import org.junit.Assert; @@ -376,7 +376,7 @@ public void testAddColumnIntoStructToIcebergTable() throws IOException { icebergTable = testTables.loadTable(TableIdentifier.of("default", "people")); testTables.appendIcebergTable(shell.getHiveConf(), icebergTable, fileFormat, null, newPeople); - List sortedExpected = new ArrayList<>(people); + List sortedExpected = Lists.newArrayList(people); sortedExpected.addAll(newPeople); sortedExpected.sort(Comparator.comparingLong(record -> (Long) record.get(0))); List rows = shell diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStatistics.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStatistics.java index 4dca5875b4a0..a57210054f9e 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStatistics.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStatistics.java @@ -20,7 +20,6 @@ package org.apache.iceberg.mr.hive; import java.io.IOException; -import java.util.ArrayList; import java.util.List; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; @@ -28,9 +27,10 @@ import org.apache.iceberg.Table; import org.apache.iceberg.TableProperties; import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.hive.HiveTableOperations; +import org.apache.iceberg.hadoop.ConfigProperties; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.thrift.TException; import org.junit.Assert; import org.junit.Assume; @@ -68,7 +68,7 @@ public void testAnalyzeTableComputeStatisticsEmptyTable() throws IOException, TE String tableName = "customers"; Table table = testTables .createTable(shell, tableName, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, - new ArrayList<>()); + Lists.newArrayList()); shell.executeStatement("ANALYZE TABLE " + dbName + "." + tableName + " COMPUTE STATISTICS"); validateBasicStats(table, dbName, tableName); } @@ -196,7 +196,7 @@ public void testStatsRemoved() throws IOException { checkColStatMinMaxValue(identifier.name(), "customer_id", 0, 2); // Create a Catalog where the KEEP_HIVE_STATS is false - shell.metastore().hiveConf().set(HiveTableOperations.KEEP_HIVE_STATS, StatsSetupConst.FALSE); + shell.metastore().hiveConf().set(ConfigProperties.KEEP_HIVE_STATS, StatsSetupConst.FALSE); TestTables nonHiveTestTables = HiveIcebergStorageHandlerTestUtils.testTables(shell, testTableType, temp); Table nonHiveTable = nonHiveTestTables.loadTable(identifier); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerLocalScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerLocalScan.java index 218114b74976..fc005ab4bde8 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerLocalScan.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerLocalScan.java @@ -20,10 +20,8 @@ package org.apache.iceberg.mr.hive; import java.io.IOException; -import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.iceberg.FileFormat; @@ -40,6 +38,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Types; import org.junit.After; import org.junit.AfterClass; @@ -61,7 +60,7 @@ public class TestHiveIcebergStorageHandlerLocalScan { @Parameters(name = "fileFormat={0}, catalog={1}") public static Collection parameters() { - Collection testParams = new ArrayList<>(); + Collection testParams = Lists.newArrayList(); // Run tests with every FileFormat for a single Catalog (HiveCatalog) for (FileFormat fileFormat : HiveIcebergStorageHandlerTestUtils.FILE_FORMATS) { @@ -97,7 +96,7 @@ public static void beforeClass() { } @AfterClass - public static void afterClass() { + public static void afterClass() throws Exception { shell.stop(); } @@ -187,7 +186,7 @@ public void selectSameColumnTwice() throws IOException { @Test public void testCreateTableWithColumnSpecification() throws IOException { TableIdentifier identifier = TableIdentifier.of("default", "customers"); - Map> data = new HashMap<>(1); + Map> data = Maps.newHashMapWithExpectedSize(1); data.put(null, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); String createSql = "CREATE EXTERNAL TABLE " + identifier + " (customer_id BIGINT, first_name STRING COMMENT 'This is first name', " + diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index cabea6da52f9..66657d98b64a 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -22,10 +22,8 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; -import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; @@ -65,6 +63,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.thrift.TException; @@ -124,7 +123,7 @@ public class TestHiveIcebergStorageHandlerNoScan { @Parameters(name = "catalog={0}") public static Collection parameters() { - Collection testParams = new ArrayList<>(); + Collection testParams = Lists.newArrayList(); for (TestTables.TestTableType testTableType : TestTables.ALL_TABLE_TYPES) { testParams.add(new Object[] {testTableType}); } @@ -148,7 +147,7 @@ public static void beforeClass() { } @AfterClass - public static void afterClass() { + public static void afterClass() throws Exception { shell.stop(); } @@ -915,7 +914,7 @@ public void testIcebergAndHmsTableProperties() throws Exception { // Check the Iceberg table parameters org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier); - Map expectedIcebergProperties = new HashMap<>(); + Map expectedIcebergProperties = Maps.newHashMap(); expectedIcebergProperties.put("custom_property", "initial_val"); expectedIcebergProperties.put("EXTERNAL", "TRUE"); expectedIcebergProperties.put("storage_handler", HiveIcebergStorageHandler.class.getName()); @@ -939,7 +938,7 @@ public void testIcebergAndHmsTableProperties() throws Exception { Assert.assertEquals(expectedIcebergProperties, icebergTable.properties()); if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) { - Assert.assertEquals(9, hmsParams.size()); + Assert.assertEquals(10, hmsParams.size()); Assert.assertEquals("initial_val", hmsParams.get("custom_property")); Assert.assertEquals("TRUE", hmsParams.get("EXTERNAL")); Assert.assertEquals("true", hmsParams.get(TableProperties.ENGINE_HIVE_ENABLED)); @@ -976,7 +975,7 @@ public void testIcebergAndHmsTableProperties() throws Exception { .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) { - Assert.assertEquals(12, hmsParams.size()); // 2 newly-added properties + previous_metadata_location prop + Assert.assertEquals(13, hmsParams.size()); // 2 newly-added properties + previous_metadata_location prop Assert.assertEquals("true", hmsParams.get("new_prop_1")); Assert.assertEquals("false", hmsParams.get("new_prop_2")); Assert.assertEquals("new_val", hmsParams.get("custom_property")); @@ -1382,7 +1381,7 @@ public void testMetaHookWithUndefinedAlterOperationType() throws Exception { org.apache.hadoop.hive.metastore.api.Table hmsTable = shell.metastore().getTable("default", "customers"); HiveIcebergMetaHook metaHook = new HiveIcebergMetaHook(shell.getHiveConf()); - EnvironmentContext environmentContext = new EnvironmentContext(new HashMap<>()); + EnvironmentContext environmentContext = new EnvironmentContext(Maps.newHashMap()); metaHook.preAlterTable(hmsTable, environmentContext); metaHook.commitAlterTable(hmsTable, environmentContext); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerTimezone.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerTimezone.java index d8b455e452c8..122973fb362a 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerTimezone.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerTimezone.java @@ -92,7 +92,7 @@ public static void beforeClass() { } @AfterClass - public static void afterClass() { + public static void afterClass() throws Exception { shell.stop(); } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithMultipleCatalogs.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithMultipleCatalogs.java index f32fb10b8bcd..08f60b04c8e8 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithMultipleCatalogs.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithMultipleCatalogs.java @@ -20,7 +20,6 @@ package org.apache.iceberg.mr.hive; import java.io.IOException; -import java.util.ArrayList; import java.util.Collection; import java.util.List; import org.apache.iceberg.AssertHelpers; @@ -31,6 +30,7 @@ import org.apache.iceberg.data.Record; import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.mr.InputFormatConfig; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; @@ -73,7 +73,7 @@ public class TestHiveIcebergStorageHandlerWithMultipleCatalogs { @Parameterized.Parameters(name = "fileFormat1={0}, fileFormat2={1}, engine={2}, tableType1={3}, catalogName1={4}, " + "tableType2={5}, catalogName2={6}") public static Collection parameters() { - Collection testParams = new ArrayList<>(); + Collection testParams = Lists.newArrayList(); String javaVersion = System.getProperty("java.specification.version"); // Run tests with PARQUET and ORC file formats for a two Catalogs @@ -97,7 +97,7 @@ public static void beforeClass() { } @AfterClass - public static void afterClass() { + public static void afterClass() throws Exception { shell.stop(); } @@ -127,7 +127,7 @@ public void testJoinTablesFromDifferentCatalogs() throws IOException { "FROM default.customers2 c2 JOIN default.customers1 c1 ON c2.customer_id = c1.customer_id " + "ORDER BY c2.customer_id"); Assert.assertEquals(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.size(), rows.size()); - HiveIcebergTestUtils.validateData(new ArrayList<>(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS), + HiveIcebergTestUtils.validateData(Lists.newArrayList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS), HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, rows), 0); } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java index 8538cb1ced62..7f43dc28e748 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java @@ -19,7 +19,6 @@ package org.apache.iceberg.mr.hive; -import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; @@ -38,6 +37,7 @@ import org.apache.iceberg.hive.TestHiveMetastore; import org.apache.iceberg.relocated.com.google.common.base.Joiner; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; /** * Test class for running HiveQL queries, essentially acting like a Beeline shell in tests. @@ -101,7 +101,7 @@ public void start() { started = true; } - public void stop() { + public void stop() throws Exception { if (client != null) { client.stop(); } @@ -140,7 +140,7 @@ public List executeStatement(String statement) { "You have to start TestHiveShell and open a session first, before running a query."); try { OperationHandle handle = client.executeStatement(session.getSessionHandle(), statement, Collections.emptyMap()); - List resultSet = new ArrayList<>(); + List resultSet = Lists.newArrayList(); if (handle.hasResultSet()) { RowSet rowSet; // keep fetching results until we can diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestIcebergInputFormats.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestIcebergInputFormats.java index 671b5bfcd8a3..ff45cf8521a1 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestIcebergInputFormats.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestIcebergInputFormats.java @@ -26,7 +26,6 @@ import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; -import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; @@ -69,6 +68,7 @@ import org.apache.iceberg.mr.mapreduce.IcebergSplit; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; @@ -195,7 +195,7 @@ public void testResiduals() throws Exception { writeRecords.get(1).set(1, 456L); writeRecords.get(1).set(2, "2020-03-20"); - List expectedRecords = new ArrayList<>(); + List expectedRecords = Lists.newArrayList(); expectedRecords.add(writeRecords.get(0)); DataFile dataFile1 = helper.writeFile(Row.of("2020-03-20", 0), writeRecords); @@ -505,8 +505,8 @@ private static TestMapredIcebergInputFormat create(Configuration conf) { try { org.apache.hadoop.mapred.InputSplit[] splits = inputFormat.getSplits(job, 1); - List iceSplits = new ArrayList<>(splits.length); - List records = new ArrayList<>(); + List iceSplits = Lists.newArrayListWithExpectedSize(splits.length); + List records = Lists.newArrayList(); for (org.apache.hadoop.mapred.InputSplit split : splits) { iceSplits.add((IcebergSplit) split); @@ -542,8 +542,8 @@ private static TestIcebergInputFormat create(Configuration conf) { IcebergInputFormat inputFormat = new IcebergInputFormat<>(); List splits = inputFormat.getSplits(context); - List iceSplits = new ArrayList<>(splits.size()); - List records = new ArrayList<>(); + List iceSplits = Lists.newArrayListWithExpectedSize(splits.size()); + List records = Lists.newArrayList(); for (InputSplit split : splits) { iceSplits.add((IcebergSplit) split); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestTables.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestTables.java index 5d3783df230f..fc0f0ca55814 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestTables.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestTables.java @@ -26,7 +26,6 @@ import java.time.LocalDateTime; import java.time.OffsetDateTime; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -58,6 +57,7 @@ import org.apache.iceberg.relocated.com.google.common.base.Joiner; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.ObjectArrays; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -118,7 +118,7 @@ public String catalogName() { * @return the tables properties string, such as {@code TBLPROPERTIES('iceberg.catalog'='mycatalog')} */ public String propertiesForCreateTableSQL(Map tableProperties) { - Map properties = new HashMap<>(tableProperties); + Map properties = Maps.newHashMap(tableProperties); properties.putIfAbsent(InputFormatConfig.CATALOG_NAME, catalog); String props = properties.entrySet().stream() .map(entry -> String.format("'%s'='%s'", entry.getKey(), entry.getValue())) @@ -415,6 +415,7 @@ public Map properties() { ); } + @Override public String locationForCreateTableSQL(TableIdentifier identifier) { return "LOCATION '" + warehouseLocation + TestTables.tablePath(identifier) + "' "; } @@ -498,23 +499,27 @@ private String getStringValueForInsert(Object value, Type type) { enum TestTableType { HADOOP_TABLE { + @Override public TestTables instance(Configuration conf, TemporaryFolder temporaryFolder, String catalogName) { return new HadoopTestTables(conf, temporaryFolder); } }, HADOOP_CATALOG { + @Override public TestTables instance(Configuration conf, TemporaryFolder temporaryFolder, String catalogName) throws IOException { return new HadoopCatalogTestTables(conf, temporaryFolder, catalogName); } }, CUSTOM_CATALOG { + @Override public TestTables instance(Configuration conf, TemporaryFolder temporaryFolder, String catalogName) throws IOException { return new CustomCatalogTestTables(conf, temporaryFolder, catalogName); } }, HIVE_CATALOG { + @Override public TestTables instance(Configuration conf, TemporaryFolder temporaryFolder, String catalogName) { return new HiveTestTables(conf, temporaryFolder, catalogName); } diff --git a/iceberg/iceberg-handler/src/test/queries/positive/alter_multi_part_table_to_iceberg.q b/iceberg/iceberg-handler/src/test/queries/positive/alter_multi_part_table_to_iceberg.q index 2a314aba2cd8..633a9e382aaf 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/alter_multi_part_table_to_iceberg.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/alter_multi_part_table_to_iceberg.q @@ -1,5 +1,7 @@ -- Mask the totalSize value as it can have slight variability, causing test flakiness --! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ set hive.vectorized.execution.enabled=false; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/alter_part_table_to_iceberg.q b/iceberg/iceberg-handler/src/test/queries/positive/alter_part_table_to_iceberg.q index 86205150299d..0d553d51064a 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/alter_part_table_to_iceberg.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/alter_part_table_to_iceberg.q @@ -1,5 +1,7 @@ -- Mask the totalSize value as it can have slight variability, causing test flakiness --! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+/$1#Masked#/ set hive.vectorized.execution.enabled=false; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/alter_table_to_iceberg.q b/iceberg/iceberg-handler/src/test/queries/positive/alter_table_to_iceberg.q index dc251fce44eb..0788ebd1a550 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/alter_table_to_iceberg.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/alter_table_to_iceberg.q @@ -1,5 +1,7 @@ -- Mask the totalSize value as it can have slight variability, causing test flakiness --! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ set hive.vectorized.execution.enabled=false; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table.q b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table.q index 880b92b3b32a..a96ef17894db 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table.q @@ -1,5 +1,7 @@ -- Mask the totalSize value as it can have slight variability, causing test flakiness --! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ set hive.vectorized.execution.enabled=false; CREATE EXTERNAL TABLE ice_t (i int, s string, ts timestamp, d date) STORED BY ICEBERG; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_as_fileformat.q b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_as_fileformat.q index ab1a30955f95..e40218747631 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_as_fileformat.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_as_fileformat.q @@ -1,5 +1,7 @@ -- Mask the totalSize value as it can have slight variability, causing test flakiness --! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ set hive.vectorized.execution.enabled=false; DROP TABLE IF EXISTS ice_orc; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg.q b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg.q index 7b6f12a6257c..6e0bb1655361 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg.q @@ -1,5 +1,7 @@ -- Mask the totalSize value as it can have slight variability, causing test flakiness --! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ set hive.vectorized.execution.enabled=false; CREATE EXTERNAL TABLE ice_t (i int, s string, ts timestamp, d date) STORED BY ICEBERG; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q index 47c1f529b63f..66ddae949679 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q @@ -1,5 +1,7 @@ -- Mask the totalSize value as it can have slight variability, causing test flakiness --! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ set hive.vectorized.execution.enabled=false; CREATE EXTERNAL TABLE ice_t (i int, s string, ts timestamp, d date) STORED BY ICEBERG WITH SERDEPROPERTIES('write.format.default'='orc'); diff --git a/iceberg/iceberg-handler/src/test/queries/positive/describe_iceberg_table.q b/iceberg/iceberg-handler/src/test/queries/positive/describe_iceberg_table.q index 26dd4471bc9b..10c07f523e8c 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/describe_iceberg_table.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/describe_iceberg_table.q @@ -1,5 +1,7 @@ -- Mask the totalSize value as it can have slight variability, causing test flakiness --! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+/$1#Masked#/ DROP TABLE IF EXISTS ice_t; CREATE EXTERNAL TABLE ice_t (i int, s string, ts timestamp, d date) STORED BY ICEBERG; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/show_create_iceberg_table.q b/iceberg/iceberg-handler/src/test/queries/positive/show_create_iceberg_table.q index f9745a26209a..cc777931db99 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/show_create_iceberg_table.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/show_create_iceberg_table.q @@ -1,3 +1,6 @@ +-- Mask random uuid +--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/ + DROP TABLE IF EXISTS ice_t; CREATE EXTERNAL TABLE ice_t (i int, s string, ts timestamp, d date) STORED BY ICEBERG; SHOW CREATE TABLE ice_t; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/truncate_force_iceberg_table.q b/iceberg/iceberg-handler/src/test/queries/positive/truncate_force_iceberg_table.q index b1c002e9f2fa..e47e8b2751e1 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/truncate_force_iceberg_table.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/truncate_force_iceberg_table.q @@ -1,6 +1,8 @@ -- SORT_QUERY_RESULTS -- Mask the totalSize value as it can have slight variability, causing test flakiness --! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ set hive.vectorized.execution.enabled=false; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/truncate_iceberg_table.q b/iceberg/iceberg-handler/src/test/queries/positive/truncate_iceberg_table.q index a52e7914bab1..e4bf0d47f6ee 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/truncate_iceberg_table.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/truncate_iceberg_table.q @@ -1,6 +1,8 @@ -- SORT_QUERY_RESULTS -- Mask the totalSize value as it can have slight variability, causing test flakiness --! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ set hive.vectorized.execution.enabled=false; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/truncate_partitioned_iceberg_table.q b/iceberg/iceberg-handler/src/test/queries/positive/truncate_partitioned_iceberg_table.q index 0bb98c77b9b3..352e1ed05d6d 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/truncate_partitioned_iceberg_table.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/truncate_partitioned_iceberg_table.q @@ -1,6 +1,8 @@ -- SORT_QUERY_RESULTS -- Mask the totalSize value as it can have slight variability, causing test flakiness --! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ set hive.vectorized.execution.enabled=false; diff --git a/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out index b5c290e3b56c..fa26ad2a6506 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out @@ -205,6 +205,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default orc # Storage Information @@ -453,6 +454,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default parquet # Storage Information @@ -701,6 +703,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default avro # Storage Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out index 0205daa4a611..caab6dfa8291 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out @@ -160,6 +160,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default orc # Storage Information @@ -357,6 +358,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default parquet # Storage Information @@ -554,6 +556,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default avro # Storage Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/alter_table_to_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/alter_table_to_iceberg.q.out index 4ee448b828a7..2910a52311d0 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/alter_table_to_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/alter_table_to_iceberg.q.out @@ -116,6 +116,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default orc # Storage Information @@ -265,6 +266,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default parquet # Storage Information @@ -414,6 +416,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default avro # Storage Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table.q.out index 722f7d554adf..f540f2d233b2 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table.q.out @@ -38,6 +38,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# # Storage Information SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe diff --git a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out index 4530cd595e97..f7c2d7024ec6 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out @@ -42,6 +42,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default ORC # Storage Information @@ -104,6 +105,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default PARQUET # Storage Information @@ -166,6 +168,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default AVRO # Storage Information @@ -228,6 +231,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default AVRO # Storage Information @@ -287,6 +291,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default ORC # Storage Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg.q.out index 722f7d554adf..f540f2d233b2 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg.q.out @@ -38,6 +38,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# # Storage Information SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe diff --git a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q.out b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q.out index a993b1514da7..5f7bb9fdca1e 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q.out @@ -38,6 +38,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default orc # Storage Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out index fd363d45a477..79bbc150a3fe 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out @@ -78,6 +78,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# # Storage Information SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe @@ -132,6 +133,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# # Storage Information SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe @@ -187,6 +189,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# # Storage Information SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe @@ -230,6 +233,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# # Storage Information SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe diff --git a/iceberg/iceberg-handler/src/test/results/positive/show_create_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/show_create_iceberg_table.q.out index 55330cb89ac9..257b5a9c7b9c 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/show_create_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/show_create_iceberg_table.q.out @@ -35,6 +35,7 @@ TBLPROPERTIES ( 'serialization.format'='1', 'table_type'='ICEBERG', #### A masked pattern was here #### + 'uuid'='#Masked#') PREHOOK: query: DROP TABLE IF EXISTS ice_t_transform PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS ice_t_transform @@ -83,6 +84,7 @@ TBLPROPERTIES ( 'serialization.format'='1', 'table_type'='ICEBERG', #### A masked pattern was here #### + 'uuid'='#Masked#') PREHOOK: query: DROP TABLE IF EXISTS ice_t_transform_prop PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS ice_t_transform_prop @@ -132,6 +134,7 @@ TBLPROPERTIES ( 'serialization.format'='1', 'table_type'='ICEBERG', #### A masked pattern was here #### + 'uuid'='#Masked#') PREHOOK: query: DROP TABLE IF EXISTS ice_t_identity_part PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS ice_t_identity_part @@ -169,3 +172,4 @@ TBLPROPERTIES ( 'serialization.format'='1', 'table_type'='ICEBERG', #### A masked pattern was here #### + 'uuid'='#Masked#') diff --git a/iceberg/iceberg-handler/src/test/results/positive/truncate_force_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/truncate_force_iceberg_table.q.out index b51e1b855cc5..764aae5f8f3c 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/truncate_force_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/truncate_force_iceberg_table.q.out @@ -101,6 +101,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default parquet # Storage Information @@ -167,6 +168,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default parquet # Storage Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/truncate_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/truncate_iceberg_table.q.out index ce8514869904..d4c95a2fa885 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/truncate_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/truncate_iceberg_table.q.out @@ -101,6 +101,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default orc # Storage Information @@ -167,6 +168,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default orc # Storage Information @@ -231,6 +233,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default orc # Storage Information @@ -297,6 +300,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default orc # Storage Information @@ -379,6 +383,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default orc # Storage Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out index 2c8dbab40663..a3baf702afba 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out @@ -113,6 +113,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default avro # Storage Information @@ -206,6 +207,7 @@ Table Parameters: table_type ICEBERG totalSize #Masked# #### A masked pattern was here #### + uuid #Masked# write.format.default avro # Storage Information