Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1867,6 +1867,41 @@ private void verifyAlterTableAddColumnsTests() throws Exception {
Assert.assertEquals(expectedSchema, hmsSchema);
}

@Test
public void checkIcebergTableLocation() throws TException, InterruptedException, IOException {
Assume.assumeTrue("This test is only for hive catalog", testTableType == TestTables.TestTableType.HIVE_CATALOG);

String dBName = "testdb";
String tableName = "tbl";
String dbWithSuffix = "/" + dBName + ".db";
String dbManagedLocation = shell.getHiveConf().get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname) + dbWithSuffix;
String dbExternalLocation = shell.getHiveConf().get(HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL.varname) +
dbWithSuffix;
Path noExistedTblPath = new Path(dbManagedLocation + "/" + tableName);
Path expectedTblPath = new Path(dbExternalLocation + "/" + tableName);

// Create a database with default external location and managed location.
shell.executeStatement("CREATE DATABASE " + dBName);

// Create a iceberg table without external keyword, and its location should on database external location.
shell.executeStatement("CREATE TABLE " + dBName + "." + tableName + " (id int) STORED BY ICEBERG");

// table location whose parent path is managed database location should not exist.
Assert.assertFalse(noExistedTblPath.getFileSystem(shell.getHiveConf()).exists(noExistedTblPath));

// Check the iceberg table location, whose parent path should be database external location.
org.apache.hadoop.hive.metastore.api.Table hmsTable = shell.metastore().getTable(dBName, tableName);
org.apache.iceberg.Table iceTable = testTables.loadTable(TableIdentifier.of(dBName, tableName));
Path hmsTblLocation = new Path(hmsTable.getSd().getLocation());
Assert.assertTrue(hmsTblLocation.getFileSystem(shell.getHiveConf()).exists(hmsTblLocation));
Assert.assertTrue(expectedTblPath.toString().equalsIgnoreCase(hmsTblLocation.toString()));
Assert.assertTrue(expectedTblPath.toString().equalsIgnoreCase(iceTable.location()));

shell.executeStatement("DROP TABLE " + dBName + "." + tableName);
// external table location should still exist if table is dropped as external.table.purge is default false.
Assert.assertTrue(hmsTblLocation.getFileSystem(shell.getHiveConf()).exists(hmsTblLocation));
}

private String getCurrentSnapshotForHiveCatalogTable(org.apache.iceberg.Table icebergTable) {
return ((BaseMetastoreTableOperations) ((BaseTable) icebergTable).operations()).currentMetadataLocation();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ public void start() {
hs2Conf.setVar(HiveConf.ConfVars.METASTOREURIS, metastore.hiveConf().getVar(HiveConf.ConfVars.METASTOREURIS));
hs2Conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE,
metastore.hiveConf().getVar(HiveConf.ConfVars.METASTOREWAREHOUSE));
hs2Conf.setVar(HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL,
metastore.hiveConf().getVar(HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL));

// Initializing RpcMetrics in a single JVM multiple times can cause issues
DefaultMetricsSystem.setMiniClusterMode(true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -499,9 +499,10 @@ Database: default
#### A masked pattern was here ####
Retention: 0
#### A masked pattern was here ####
Table Type: MANAGED_TABLE
Table Type: EXTERNAL_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}}
EXTERNAL TRUE
bucketing_version 2
current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"a\",\"required\":false,\"type\":\"int\"}]}
engine.hive.enabled true
Expand Down Expand Up @@ -542,9 +543,10 @@ Database: default
#### A masked pattern was here ####
Retention: 0
#### A masked pattern was here ####
Table Type: MANAGED_TABLE
Table Type: EXTERNAL_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\"}}
EXTERNAL TRUE
bucketing_version 2
current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"b\",\"required\":false,\"type\":\"int\"}]}
engine.hive.enabled true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ PREHOOK: Input: default@emp_iceberg
POSTHOOK: query: show create table emp_iceberg
POSTHOOK: type: SHOW_CREATETABLE
POSTHOOK: Input: default@emp_iceberg
CREATE TABLE `emp_iceberg`(
CREATE EXTERNAL TABLE `emp_iceberg`(
`id` int,
`company` string)
PARTITIONED BY SPEC (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,10 @@ Database: default
#### A masked pattern was here ####
Retention: 0
#### A masked pattern was here ####
Table Type: MANAGED_TABLE
Table Type: EXTERNAL_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
EXTERNAL TRUE
bucketing_version 2
current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"key\",\"required\":false,\"type\":\"int\"}]}
current-snapshot-id #Masked#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ PREHOOK: Input: default@ice01
POSTHOOK: query: show create table ice01
POSTHOOK: type: SHOW_CREATETABLE
POSTHOOK: Input: default@ice01
CREATE TABLE `ice01`(
CREATE EXTERNAL TABLE `ice01`(
`id` int)
ROW FORMAT SERDE
'org.apache.iceberg.mr.hive.HiveIcebergSerDe'
Expand Down Expand Up @@ -122,7 +122,7 @@ PREHOOK: Input: default@ice01
POSTHOOK: query: show create table ice01
POSTHOOK: type: SHOW_CREATETABLE
POSTHOOK: Input: default@ice01
CREATE TABLE `ice01`(
CREATE EXTERNAL TABLE `ice01`(
`id` int)
ROW FORMAT SERDE
'org.apache.iceberg.mr.hive.HiveIcebergSerDe'
Expand Down Expand Up @@ -265,7 +265,7 @@ PREHOOK: Input: default@icepart01
POSTHOOK: query: show create table icepart01
POSTHOOK: type: SHOW_CREATETABLE
POSTHOOK: Input: default@icepart01
CREATE TABLE `icepart01`(
CREATE EXTERNAL TABLE `icepart01`(
`id` int,
`part` int)
PARTITIONED BY SPEC (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,10 @@ Database: default
#### A masked pattern was here ####
Retention: 0
#### A masked pattern was here ####
Table Type: MANAGED_TABLE
Table Type: EXTERNAL_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}}
EXTERNAL TRUE
bucketing_version 2
current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"a\",\"required\":false,\"type\":\"int\"}]}
engine.hive.enabled true
Expand Down Expand Up @@ -187,9 +188,10 @@ Database: default
#### A masked pattern was here ####
Retention: 0
#### A masked pattern was here ####
Table Type: MANAGED_TABLE
Table Type: EXTERNAL_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\"}}
EXTERNAL TRUE
bucketing_version 2
current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"b\",\"required\":false,\"type\":\"int\"}]}
engine.hive.enabled true
Expand Down
10 changes: 8 additions & 2 deletions ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import static org.apache.hadoop.hive.conf.Constants.MATERIALIZED_VIEW_REWRITING_TIME_WINDOW;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_LOAD_DYNAMIC_PARTITIONS_SCAN_SPECIFIC_PARTITIONS;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_WRITE_NOTIFICATION_MAX_BATCH_SIZE;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.CTAS_LEGACY_CONFIG;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.convertToGetPartitionsByNamesRequest;
import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog;
Expand Down Expand Up @@ -1351,9 +1352,14 @@ public void createTable(Table tbl, boolean ifNotExists,

CreateTableRequest request = new CreateTableRequest(tTbl);

if (isIcebergTable(tbl) && isIcebergStatsSource(conf)) {
if (isIcebergTable(tbl)) {
EnvironmentContext envContext = new EnvironmentContext();
envContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
if (TableType.MANAGED_TABLE.equals(tbl.getTableType())) {
envContext.putToProperties(CTAS_LEGACY_CONFIG, Boolean.TRUE.toString());
}
if (isIcebergStatsSource(conf)) {
envContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
}
request.setEnvContext(envContext);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
import static org.apache.hadoop.hive.common.repl.ReplConst.REPL_TARGET_DATABASE_PROPERTY;
import static org.apache.hadoop.hive.metastore.HiveMetaStoreClient.RENAME_PARTITION_MAKE_COPY;
import static org.apache.hadoop.hive.metastore.HiveMetaStoreClient.TRUNCATE_SKIP_DATA_DELETION;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.CTAS_LEGACY_CONFIG;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS;
import static org.apache.hadoop.hive.metastore.ExceptionHandler.handleException;
import static org.apache.hadoop.hive.metastore.ExceptionHandler.newMetaException;
Expand Down Expand Up @@ -2256,9 +2257,15 @@ private void create_table_core(final RawStore ms, final CreateTableRequest req)
tbl = transformer.transformCreateTable(tbl, processorCapabilities, processorId);
}

if (tbl.getParameters() != null) {
tbl.getParameters().remove(TABLE_IS_CTAS);
tbl.getParameters().remove(TABLE_IS_CTLT);
Map<String, String> params = tbl.getParameters();
if (params != null) {
params.remove(TABLE_IS_CTAS);
params.remove(TABLE_IS_CTLT);
if (MetaStoreServerUtils.getBooleanEnvProp(envContext, CTAS_LEGACY_CONFIG) &&
TableType.MANAGED_TABLE.toString().equals(tbl.getTableType())) {
params.put("EXTERNAL", "TRUE");
tbl.setTableType(TableType.EXTERNAL_TABLE.toString());
}
}

// If the given table has column statistics, save it here. We will update it later.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -548,16 +548,16 @@ public static void updateTableStatsForCreateTable(Warehouse wh, Database db, Tab
}

if (MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.STATS_AUTO_GATHER) &&
!isDoNotUpdateStats(envContext)) {
!getBooleanEnvProp(envContext, StatsSetupConst.DO_NOT_UPDATE_STATS)) {
LOG.debug("Calling updateTableStatsSlow for table {}.{}.{}", tbl.getCatName(), tbl.getDbName(), tbl.getTableName());
updateTableStatsSlow(db, tbl, wh, newDir, false, envContext);
}
}

private static boolean isDoNotUpdateStats(EnvironmentContext envContext) {
public static boolean getBooleanEnvProp(EnvironmentContext envContext, String key) {
return Optional.ofNullable(envContext)
.map(EnvironmentContext::getProperties)
.map(props -> props.getOrDefault(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.FALSE))
.map(props -> props.getOrDefault(key, StatsSetupConst.FALSE))
.map(Boolean::parseBoolean)
.orElse(false);
}
Expand Down