diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index 21fb270f3663..740dcea4201b 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -1867,6 +1867,41 @@ private void verifyAlterTableAddColumnsTests() throws Exception { Assert.assertEquals(expectedSchema, hmsSchema); } + @Test + public void checkIcebergTableLocation() throws TException, InterruptedException, IOException { + Assume.assumeTrue("This test is only for hive catalog", testTableType == TestTables.TestTableType.HIVE_CATALOG); + + String dBName = "testdb"; + String tableName = "tbl"; + String dbWithSuffix = "/" + dBName + ".db"; + String dbManagedLocation = shell.getHiveConf().get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname) + dbWithSuffix; + String dbExternalLocation = shell.getHiveConf().get(HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL.varname) + + dbWithSuffix; + Path noExistedTblPath = new Path(dbManagedLocation + "/" + tableName); + Path expectedTblPath = new Path(dbExternalLocation + "/" + tableName); + + // Create a database with default external location and managed location. + shell.executeStatement("CREATE DATABASE " + dBName); + + // Create a iceberg table without external keyword, and its location should on database external location. + shell.executeStatement("CREATE TABLE " + dBName + "." + tableName + " (id int) STORED BY ICEBERG"); + + // table location whose parent path is managed database location should not exist. + Assert.assertFalse(noExistedTblPath.getFileSystem(shell.getHiveConf()).exists(noExistedTblPath)); + + // Check the iceberg table location, whose parent path should be database external location. + org.apache.hadoop.hive.metastore.api.Table hmsTable = shell.metastore().getTable(dBName, tableName); + org.apache.iceberg.Table iceTable = testTables.loadTable(TableIdentifier.of(dBName, tableName)); + Path hmsTblLocation = new Path(hmsTable.getSd().getLocation()); + Assert.assertTrue(hmsTblLocation.getFileSystem(shell.getHiveConf()).exists(hmsTblLocation)); + Assert.assertTrue(expectedTblPath.toString().equalsIgnoreCase(hmsTblLocation.toString())); + Assert.assertTrue(expectedTblPath.toString().equalsIgnoreCase(iceTable.location())); + + shell.executeStatement("DROP TABLE " + dBName + "." + tableName); + // external table location should still exist if table is dropped as external.table.purge is default false. + Assert.assertTrue(hmsTblLocation.getFileSystem(shell.getHiveConf()).exists(hmsTblLocation)); + } + private String getCurrentSnapshotForHiveCatalogTable(org.apache.iceberg.Table icebergTable) { return ((BaseMetastoreTableOperations) ((BaseTable) icebergTable).operations()).currentMetadataLocation(); } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java index 9d17d85ca772..dbb92305e2dc 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java @@ -87,6 +87,8 @@ public void start() { hs2Conf.setVar(HiveConf.ConfVars.METASTOREURIS, metastore.hiveConf().getVar(HiveConf.ConfVars.METASTOREURIS)); hs2Conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, metastore.hiveConf().getVar(HiveConf.ConfVars.METASTOREWAREHOUSE)); + hs2Conf.setVar(HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL, + metastore.hiveConf().getVar(HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL)); // Initializing RpcMetrics in a single JVM multiple times can cause issues DefaultMetricsSystem.setMiniClusterMode(true); diff --git a/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out index 8c905528e79d..13544513cb33 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out @@ -499,9 +499,10 @@ Database: default #### A masked pattern was here #### Retention: 0 #### A masked pattern was here #### -Table Type: MANAGED_TABLE +Table Type: EXTERNAL_TABLE Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}} + EXTERNAL TRUE bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"a\",\"required\":false,\"type\":\"int\"}]} engine.hive.enabled true @@ -542,9 +543,10 @@ Database: default #### A masked pattern was here #### Retention: 0 #### A masked pattern was here #### -Table Type: MANAGED_TABLE +Table Type: EXTERNAL_TABLE Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\"}} + EXTERNAL TRUE bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"b\",\"required\":false,\"type\":\"int\"}]} engine.hive.enabled true diff --git a/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out index 447a4b27f829..e853cfc6bd4c 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out @@ -110,7 +110,7 @@ PREHOOK: Input: default@emp_iceberg POSTHOOK: query: show create table emp_iceberg POSTHOOK: type: SHOW_CREATETABLE POSTHOOK: Input: default@emp_iceberg -CREATE TABLE `emp_iceberg`( +CREATE EXTERNAL TABLE `emp_iceberg`( `id` int, `company` string) PARTITIONED BY SPEC ( diff --git a/iceberg/iceberg-handler/src/test/results/positive/delete_all_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/delete_all_iceberg.q.out index 056af234068d..d2ce3cc12516 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/delete_all_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/delete_all_iceberg.q.out @@ -106,9 +106,10 @@ Database: default #### A masked pattern was here #### Retention: 0 #### A masked pattern was here #### -Table Type: MANAGED_TABLE +Table Type: EXTERNAL_TABLE Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + EXTERNAL TRUE bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"key\",\"required\":false,\"type\":\"int\"}]} current-snapshot-id #Masked# diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_v2_deletes.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_v2_deletes.q.out index 10d183f43d8c..bbe37b3da2d9 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_v2_deletes.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_v2_deletes.q.out @@ -14,7 +14,7 @@ PREHOOK: Input: default@ice01 POSTHOOK: query: show create table ice01 POSTHOOK: type: SHOW_CREATETABLE POSTHOOK: Input: default@ice01 -CREATE TABLE `ice01`( +CREATE EXTERNAL TABLE `ice01`( `id` int) ROW FORMAT SERDE 'org.apache.iceberg.mr.hive.HiveIcebergSerDe' @@ -122,7 +122,7 @@ PREHOOK: Input: default@ice01 POSTHOOK: query: show create table ice01 POSTHOOK: type: SHOW_CREATETABLE POSTHOOK: Input: default@ice01 -CREATE TABLE `ice01`( +CREATE EXTERNAL TABLE `ice01`( `id` int) ROW FORMAT SERDE 'org.apache.iceberg.mr.hive.HiveIcebergSerDe' @@ -265,7 +265,7 @@ PREHOOK: Input: default@icepart01 POSTHOOK: query: show create table icepart01 POSTHOOK: type: SHOW_CREATETABLE POSTHOOK: Input: default@icepart01 -CREATE TABLE `icepart01`( +CREATE EXTERNAL TABLE `icepart01`( `id` int, `part` int) PARTITIONED BY SPEC ( diff --git a/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out index d24cbaf5e2dc..abb26c994641 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out @@ -144,9 +144,10 @@ Database: default #### A masked pattern was here #### Retention: 0 #### A masked pattern was here #### -Table Type: MANAGED_TABLE +Table Type: EXTERNAL_TABLE Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}} + EXTERNAL TRUE bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"a\",\"required\":false,\"type\":\"int\"}]} engine.hive.enabled true @@ -187,9 +188,10 @@ Database: default #### A masked pattern was here #### Retention: 0 #### A masked pattern was here #### -Table Type: MANAGED_TABLE +Table Type: EXTERNAL_TABLE Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\"}} + EXTERNAL TRUE bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"b\",\"required\":false,\"type\":\"int\"}]} engine.hive.enabled true diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index e013db7db266..307373af8a65 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -33,6 +33,7 @@ import static org.apache.hadoop.hive.conf.Constants.MATERIALIZED_VIEW_REWRITING_TIME_WINDOW; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_LOAD_DYNAMIC_PARTITIONS_SCAN_SPECIFIC_PARTITIONS; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_WRITE_NOTIFICATION_MAX_BATCH_SIZE; +import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.CTAS_LEGACY_CONFIG; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.convertToGetPartitionsByNamesRequest; import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog; @@ -1351,9 +1352,14 @@ public void createTable(Table tbl, boolean ifNotExists, CreateTableRequest request = new CreateTableRequest(tTbl); - if (isIcebergTable(tbl) && isIcebergStatsSource(conf)) { + if (isIcebergTable(tbl)) { EnvironmentContext envContext = new EnvironmentContext(); - envContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); + if (TableType.MANAGED_TABLE.equals(tbl.getTableType())) { + envContext.putToProperties(CTAS_LEGACY_CONFIG, Boolean.TRUE.toString()); + } + if (isIcebergStatsSource(conf)) { + envContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); + } request.setEnvContext(envContext); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java index a8f888f3a40d..39790b1d5a96 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java @@ -101,6 +101,7 @@ import static org.apache.hadoop.hive.common.repl.ReplConst.REPL_TARGET_DATABASE_PROPERTY; import static org.apache.hadoop.hive.metastore.HiveMetaStoreClient.RENAME_PARTITION_MAKE_COPY; import static org.apache.hadoop.hive.metastore.HiveMetaStoreClient.TRUNCATE_SKIP_DATA_DELETION; +import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.CTAS_LEGACY_CONFIG; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS; import static org.apache.hadoop.hive.metastore.ExceptionHandler.handleException; import static org.apache.hadoop.hive.metastore.ExceptionHandler.newMetaException; @@ -2256,9 +2257,15 @@ private void create_table_core(final RawStore ms, final CreateTableRequest req) tbl = transformer.transformCreateTable(tbl, processorCapabilities, processorId); } - if (tbl.getParameters() != null) { - tbl.getParameters().remove(TABLE_IS_CTAS); - tbl.getParameters().remove(TABLE_IS_CTLT); + Map params = tbl.getParameters(); + if (params != null) { + params.remove(TABLE_IS_CTAS); + params.remove(TABLE_IS_CTLT); + if (MetaStoreServerUtils.getBooleanEnvProp(envContext, CTAS_LEGACY_CONFIG) && + TableType.MANAGED_TABLE.toString().equals(tbl.getTableType())) { + params.put("EXTERNAL", "TRUE"); + tbl.setTableType(TableType.EXTERNAL_TABLE.toString()); + } } // If the given table has column statistics, save it here. We will update it later. diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java index 5907efbc18f7..f4afe2aa65d7 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java @@ -548,16 +548,16 @@ public static void updateTableStatsForCreateTable(Warehouse wh, Database db, Tab } if (MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.STATS_AUTO_GATHER) && - !isDoNotUpdateStats(envContext)) { + !getBooleanEnvProp(envContext, StatsSetupConst.DO_NOT_UPDATE_STATS)) { LOG.debug("Calling updateTableStatsSlow for table {}.{}.{}", tbl.getCatName(), tbl.getDbName(), tbl.getTableName()); updateTableStatsSlow(db, tbl, wh, newDir, false, envContext); } } - private static boolean isDoNotUpdateStats(EnvironmentContext envContext) { + public static boolean getBooleanEnvProp(EnvironmentContext envContext, String key) { return Optional.ofNullable(envContext) .map(EnvironmentContext::getProperties) - .map(props -> props.getOrDefault(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.FALSE)) + .map(props -> props.getOrDefault(key, StatsSetupConst.FALSE)) .map(Boolean::parseBoolean) .orElse(false); }