From 5b66126bac59cd3d33ea4d5c143fa371238b1262 Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat Date: Mon, 15 Oct 2018 10:39:05 +0530 Subject: [PATCH 1/3] HIVE-20708: Load an external table as an external table on target with the same location as on the source Dump an external table as an external table. When loading an external table set the location of the target table same as the location of source, but relative to the file system of the target location. IOW, the scheme, authority of the target location is same as the target file system but the path relative to the file system is same as the source. --- ...stReplicationScenariosAcrossInstances.java | 51 ++++++++++++++- .../hive/ql/parse/WarehouseInstance.java | 65 +++++++++++++++++++ .../repl/bootstrap/load/table/LoadTable.java | 9 ++- .../hive/ql/parse/ImportSemanticAnalyzer.java | 7 +- .../parse/repl/dump/io/TableSerializer.java | 19 ------ .../hadoop/hive/ql/plan/ImportTableDesc.java | 7 +- 6 files changed, 134 insertions(+), 24 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java index 7e8caf0114d4..43428ac38b73 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java @@ -1523,6 +1523,9 @@ public void testDumpExternalTableSetFalse() throws Throwable { @Test public void testDumpExternalTableSetTrue() throws Throwable { + String extTabName1 = "et1"; + String extTabLoc1 = primary.createExternalLoc(primaryDbName + "_" + extTabName1); + WarehouseInstance.Tuple tuple = primary .run("use " + primaryDbName) .run("create external table t1 (id int)") @@ -1532,6 +1535,9 @@ public void testDumpExternalTableSetTrue() throws Throwable { .run("insert into table t2 partition(country='india') values ('bangalore')") .run("insert into table t2 partition(country='us') values ('austin')") .run("insert into table t2 partition(country='france') values ('paris')") + .run("create external table " + extTabName1 + " (id int) location '" + extTabLoc1 + "'") + .run("insert into " + extTabName1 + " values (3)") + .verifyExternalTable(primaryDbName, extTabName1, extTabLoc1) .dump("repl dump " + primaryDbName + " with ('hive.repl.include.external.tables'='true')"); replica.load(replicatedDbName, tuple.dumpLocation) @@ -1540,17 +1546,33 @@ public void testDumpExternalTableSetTrue() throws Throwable { .verifyResult("t1") .run("show tables like 't2'") .verifyResult("t2") + .run("show tables like '" + extTabName1 + "'") + .verifyResult(extTabName1) .run("repl status " + replicatedDbName) .verifyResult(tuple.lastReplicationId) .run("select country from t2 where country = 'us'") .verifyResult("us") .run("select country from t2 where country = 'france'") - .verifyResult("france"); + .verifyResult("france") + .run("select id from " + extTabName1) + .verifyResult("3") + .verifyExternalTable(replicatedDbName, "t1", + primary.getTableLocation(primaryDbName, "t1")) + .verifyExternalTable(replicatedDbName, "t2", + primary.getTableLocation(primaryDbName, "t2")) + .verifyExternalTable(replicatedDbName, extTabName1, + primary.getTableLocation(primaryDbName, extTabName1)); + + String extTabName2 = "et2"; + String extTabLoc2 = primary.createExternalLoc(primaryDbName + "_" + extTabName2); tuple = primary.run("use " + primaryDbName) .run("create external table t3 (id int)") .run("insert into table t3 values (10)") .run("create external table t4 as select id from t3") + .run("create external table " + extTabName2 + " (id int) location '" + extTabLoc2 + "'") + .verifyExternalTable(primaryDbName, extTabName2, extTabLoc2) + .run("insert into " + extTabName2 + " values (5)") .dump("repl dump " + primaryDbName + " from " + tuple.lastReplicationId + " with ('hive.repl.include.external.tables'='true')"); @@ -1561,7 +1583,32 @@ public void testDumpExternalTableSetTrue() throws Throwable { .run("select id from t3") .verifyResult("10") .run("select id from t4") - .verifyResult(null); // Returns null as create table event doesn't list files + .verifyResult("10") + .run("select id from " + extTabName2) + .verifyResult("5") + .verifyExternalTable(replicatedDbName, "t3", + primary.getTableLocation(primaryDbName,"t3")) + .verifyExternalTable(replicatedDbName, "t4", + primary.getTableLocation(primaryDbName,"t4")) + .verifyExternalTable(replicatedDbName, extTabName2, + primary.getTableLocation(primaryDbName,extTabName2)); + + // Insert a row in the external table on primary and it should show up on replica as well + // since both of them share the same file system and the external tables on both point to the + // same location. + primary.run("use " + primaryDbName) + .run("insert into " + extTabName1 + " values (4)"); + replica.run("use " + replicatedDbName) + .run("select id from " + extTabName1 + " where id = 4") + .verifyResult("4"); + primary.run("use " + primaryDbName) + .run("insert into " + extTabName2 + " values (6)"); + replica.run("use " + replicatedDbName) + .run("select id from " + extTabName2 + " where id = 6") + .verifyResult("6"); + + primary.deleteExternalLoc(extTabLoc1); + primary.deleteExternalLoc(extTabLoc2); } @Test diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java index 7900779e7a56..a23e3646c964 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.MetaStoreTestUtils; +import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.ForeignKeysRequest; @@ -79,6 +80,7 @@ public class WarehouseInstance implements Closeable { MiniDFSCluster miniDFSCluster; private HiveMetaStoreClient client; public final Path warehouseRoot; + private String extLocRoot; private static int uniqueIdentifier = 0; @@ -98,6 +100,14 @@ public class WarehouseInstance implements Closeable { } Path cmRootPath = mkDir(fs, "/cmroot" + uniqueIdentifier); this.functionsRoot = mkDir(fs, "/functions" + uniqueIdentifier).toString(); + + // Create location for external table data, if required. We create the location on primary + // and it may get replicated to the replica. In case primary and replica are on the same file + // system the location will be shared between them, which is what is expected in case of + // external tables. + extLocRoot = "/external" + uniqueIdentifier; + mkDir(fs, extLocRoot); + initialize(cmRootPath.toString(), warehouseRoot.toString(), overridesForHiveConf); } @@ -171,6 +181,34 @@ private Path mkDir(DistributedFileSystem fs, String pathString) return PathBuilder.fullyQualifiedHDFSUri(path, fs); } + /** + * Create location pointed by the given path with respect to the external table location root on + * DFS. + * @param location + * @return the path string to the location created with respect to the DFS + * @throws IOException + * @throws SemanticException + */ + public String createExternalLoc(String location) throws IOException, SemanticException { + String extLoc = extLocRoot + "/" + location; + DistributedFileSystem fs = miniDFSCluster.getFileSystem(); + mkDir(fs, extLoc); + return extLoc; + } + + /** + * Delete location pointed by the given path with respect to the external table location root on + * DFS. + * @param location + * @throws IOException + * @throws SemanticException + */ + public void deleteExternalLoc(String location) throws IOException, SemanticException { + String extLoc = extLocRoot + "/" + location; + DistributedFileSystem fs = miniDFSCluster.getFileSystem(); + fs.delete(new Path(extLoc), true); + } + public HiveConf getConf() { return hiveConf; } @@ -348,6 +386,19 @@ WarehouseInstance verifyResults(List data) throws IOException { return this; } + /** + * Verify that the given table is an external table with the given location + * @return this + * @throws IOException, Exception + */ + WarehouseInstance verifyExternalTable(String dbName, String tabName, String expectedLoc) throws IOException, + Exception { + Table table = getTable(dbName, tabName); + assertEquals(TableType.EXTERNAL_TABLE.toString(), table.getTableType()); + assertEquals(expectedLoc, getTableLocation(dbName, tabName)); + return this; + } + public List getOutput() throws IOException { List results = new ArrayList<>(); driver.getResults(results); @@ -380,6 +431,20 @@ public Table getTable(String dbName, String tableName) throws Exception { } } + /** + * + * @param dbName + * @param tableName + * @return the path of given table relative to the file system of ware house. + * @throws Exception + */ + public String getTableLocation(String dbName, String tableName) throws Exception { + Table table = client.getTable(dbName, tableName); + String location = table.getSd().getLocation(); + URI tabLocURI = (new Path(location)).toUri(); + return tabLocURI.getPath().toString(); + } + public List getAllPartitions(String dbName, String tableName) throws Exception { try { return client.listPartitions(dbName, tableName, Short.MAX_VALUE); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java index 8538463cc6a4..215b8096df6b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.table; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -206,7 +207,13 @@ private void newTableTasks(ImportTableDesc tblDesc, Task tblRootTask) throws private String location(ImportTableDesc tblDesc, Database parentDb) throws MetaException, SemanticException { - if (!tableContext.waitOnPrecursor()) { + + if (tblDesc.tableType().equals(TableType.EXTERNAL_TABLE)) { + // For an external table, we need to use the path specified in the source table but the + // scheme, authority and root path of the target file system. + return context.warehouse.getDnsPath(tblDesc.getTableLocationPath()).toString(); + } + else if (!tableContext.waitOnPrecursor()) { return context.warehouse.getDefaultTablePath( parentDb, tblDesc.getTableName(), tblDesc.isExternal()).toString(); } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 16ce5d562db9..2feb5a84d876 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -1060,7 +1060,12 @@ private static void createReplImportTasks( } if (tblDesc.getLocation() == null) { - if (!waitOnPrecursor){ + if (tblDesc.tableType().equals(TableType.EXTERNAL_TABLE)) { + // For an external table, we need to use the path specified in the source table but the + // scheme, authority and root path of the target file system. + tblDesc.setLocation(wh.getDnsPath(tblDesc.getTableLocationPath()).toString()); + } + else if (!waitOnPrecursor){ tblDesc.setLocation(wh.getDefaultTablePath(parentDb, tblDesc.getTableName(), tblDesc.isExternal()).toString()); } else { tblDesc.setLocation( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/TableSerializer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/TableSerializer.java index f05c23114aad..da17454ad674 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/TableSerializer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/TableSerializer.java @@ -83,14 +83,6 @@ private Table updatePropertiesInTable(Table table, ReplicationSpec additionalPro ReplicationSpec.KEY.CURR_STATE_ID.toString(), additionalPropertiesProvider.getCurrentReplicationState()); } - if (isExternalTable(table)) { - // Replication destination will not be external - override if set - table.putToParameters("EXTERNAL", "FALSE"); - } - if (isExternalTableType(table)) { - // Replication dest will not be external - override if set - table.setTableType(TableType.MANAGED_TABLE.toString()); - } } else { // ReplicationSpec.KEY scopeKey = ReplicationSpec.KEY.REPL_SCOPE; // write(out, ",\""+ scopeKey.toString() +"\":\"" + replicationSpec.get(scopeKey) + "\""); @@ -101,17 +93,6 @@ private Table updatePropertiesInTable(Table table, ReplicationSpec additionalPro return table; } - private boolean isExternalTableType(org.apache.hadoop.hive.metastore.api.Table table) { - return table.isSetTableType() - && table.getTableType().equalsIgnoreCase(TableType.EXTERNAL_TABLE.toString()); - } - - private boolean isExternalTable(org.apache.hadoop.hive.metastore.api.Table table) { - Map params = table.getParameters(); - return params.containsKey("EXTERNAL") - && params.get("EXTERNAL").equalsIgnoreCase("TRUE"); - } - private void writePartitions(JsonWriter writer, ReplicationSpec additionalPropertiesProvider) throws SemanticException, IOException { writer.jsonGenerator.writeStartArray(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java index 50b43bad3136..9dc9aa739035 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java @@ -24,6 +24,7 @@ import java.util.Map; import com.google.common.collect.ImmutableSet; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; @@ -346,7 +347,7 @@ public TableType tableType() { } else if (isMaterializedView()) { return TableType.MATERIALIZED_VIEW; } - return TableType.MANAGED_TABLE; + return table.getTableType(); } public Table toTable(HiveConf conf) throws Exception { @@ -365,4 +366,8 @@ public void setReplWriteId(Long replWriteId) { this.createTblDesc.setReplWriteId(replWriteId); } } + + public Path getTableLocationPath() { + return table.getPath(); + } } From 678308ea34e96d3496e86b45439742df0d71b93f Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat Date: Fri, 19 Oct 2018 21:44:01 +0530 Subject: [PATCH 2/3] HIVE-20708: Address Hive-QA checkstyle and whitespace comments. Also fix the testcase failures. --- .../TestReplicationScenariosAcrossInstances.java | 13 +++++++++---- .../hadoop/hive/ql/parse/WarehouseInstance.java | 2 +- .../exec/repl/bootstrap/load/table/LoadTable.java | 4 ++-- .../hive/ql/parse/ImportSemanticAnalyzer.java | 3 +-- .../hive/ql/parse/repl/dump/io/TableSerializer.java | 1 - .../results/clientpositive/repl_2_exim_basic.q.out | 3 +-- 6 files changed, 14 insertions(+), 12 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java index 43428ac38b73..dbc5d56fc577 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java @@ -1517,8 +1517,13 @@ public void testDumpExternalTableSetFalse() throws Throwable { .run("use " + replicatedDbName) .run("show tables like 't3'") .verifyResult("t3") + .verifyExternalTable(replicatedDbName, "t3", + primary.getTableLocation(primaryDbName, "t3")) .run("select id from t3 where id = 10") - .verifyFailure(new String[] {"10"}); + // Since both the external tables point to the same location as they share the same + // file system, the data in external table on the primary is also visible through the + // external table on the replica. + .verifyResult("10"); } @Test @@ -1587,11 +1592,11 @@ public void testDumpExternalTableSetTrue() throws Throwable { .run("select id from " + extTabName2) .verifyResult("5") .verifyExternalTable(replicatedDbName, "t3", - primary.getTableLocation(primaryDbName,"t3")) + primary.getTableLocation(primaryDbName, "t3")) .verifyExternalTable(replicatedDbName, "t4", - primary.getTableLocation(primaryDbName,"t4")) + primary.getTableLocation(primaryDbName, "t4")) .verifyExternalTable(replicatedDbName, extTabName2, - primary.getTableLocation(primaryDbName,extTabName2)); + primary.getTableLocation(primaryDbName, extTabName2)); // Insert a row in the external table on primary and it should show up on replica as well // since both of them share the same file system and the external tables on both point to the diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java index a23e3646c964..cbe60331bacb 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java @@ -387,7 +387,7 @@ WarehouseInstance verifyResults(List data) throws IOException { } /** - * Verify that the given table is an external table with the given location + * Verify that the given table is an external table with the given location. * @return this * @throws IOException, Exception */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java index 215b8096df6b..33945749f61b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java @@ -208,12 +208,12 @@ private void newTableTasks(ImportTableDesc tblDesc, Task tblRootTask) throws private String location(ImportTableDesc tblDesc, Database parentDb) throws MetaException, SemanticException { + if (tblDesc.tableType().equals(TableType.EXTERNAL_TABLE)) { // For an external table, we need to use the path specified in the source table but the // scheme, authority and root path of the target file system. return context.warehouse.getDnsPath(tblDesc.getTableLocationPath()).toString(); - } - else if (!tableContext.waitOnPrecursor()) { + } else if (!tableContext.waitOnPrecursor()) { return context.warehouse.getDefaultTablePath( parentDb, tblDesc.getTableName(), tblDesc.isExternal()).toString(); } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 2feb5a84d876..514783ba0f22 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -1064,8 +1064,7 @@ private static void createReplImportTasks( // For an external table, we need to use the path specified in the source table but the // scheme, authority and root path of the target file system. tblDesc.setLocation(wh.getDnsPath(tblDesc.getTableLocationPath()).toString()); - } - else if (!waitOnPrecursor){ + } else if (!waitOnPrecursor){ tblDesc.setLocation(wh.getDefaultTablePath(parentDb, tblDesc.getTableName(), tblDesc.isExternal()).toString()); } else { tblDesc.setLocation( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/TableSerializer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/TableSerializer.java index da17454ad674..d16a557fba10 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/TableSerializer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/TableSerializer.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.parse.repl.dump.io; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils; diff --git a/ql/src/test/results/clientpositive/repl_2_exim_basic.q.out b/ql/src/test/results/clientpositive/repl_2_exim_basic.q.out index 40b6ad724618..950b5e414b00 100644 --- a/ql/src/test/results/clientpositive/repl_2_exim_basic.q.out +++ b/ql/src/test/results/clientpositive/repl_2_exim_basic.q.out @@ -411,7 +411,7 @@ PREHOOK: Input: default@ext_t_r_imported POSTHOOK: query: show create table ext_t_r_imported POSTHOOK: type: SHOW_CREATETABLE POSTHOOK: Input: default@ext_t_r_imported -CREATE TABLE `ext_t_r_imported`( +CREATE EXTERNAL TABLE `ext_t_r_imported`( `emp_id` int COMMENT 'employee id') PARTITIONED BY ( `emp_country` string, @@ -425,7 +425,6 @@ OUTPUTFORMAT LOCATION #### A masked pattern was here #### TBLPROPERTIES ( - 'EXTERNAL'='FALSE', 'bucketing_version'='2', 'discover.partitions'='true', 'repl.last.id'='0', From b498851ea20959aa62edf949984d9ab3dbf8f242 Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat Date: Wed, 14 Nov 2018 14:49:28 +0530 Subject: [PATCH 3/3] HIVE-20708: Address comments by Sankar and Mahesh. --- ...stReplicationScenariosAcrossInstances.java | 22 ++++++++++++++++--- .../hive/ql/parse/WarehouseInstance.java | 8 +++---- .../bootstrap/load/table/LoadPartitions.java | 6 ++++- .../repl/bootstrap/load/table/LoadTable.java | 6 ++--- .../hive/ql/parse/ImportSemanticAnalyzer.java | 4 ++-- .../hadoop/hive/ql/plan/ImportTableDesc.java | 10 +++------ 6 files changed, 34 insertions(+), 22 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java index dbc5d56fc577..0772bf0f9bcf 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java @@ -1530,6 +1530,8 @@ public void testDumpExternalTableSetFalse() throws Throwable { public void testDumpExternalTableSetTrue() throws Throwable { String extTabName1 = "et1"; String extTabLoc1 = primary.createExternalLoc(primaryDbName + "_" + extTabName1); + String extPartTabName1 = "ept1"; + String extPartTabLoc1 = primary.createExternalLoc(primaryDbName + "_" + extPartTabName1); WarehouseInstance.Tuple tuple = primary .run("use " + primaryDbName) @@ -1542,7 +1544,14 @@ public void testDumpExternalTableSetTrue() throws Throwable { .run("insert into table t2 partition(country='france') values ('paris')") .run("create external table " + extTabName1 + " (id int) location '" + extTabLoc1 + "'") .run("insert into " + extTabName1 + " values (3)") + .run("create external table " + extPartTabName1 + " (place string)" + + " partitioned by (country string)" + + " location '" + extPartTabLoc1 + "'") + .run("insert into table " + extPartTabName1 + " partition(country='india') values ('bangalore')") + .run("insert into table " + extPartTabName1 + " partition(country='us') values ('austin')") + .run("insert into table " + extPartTabName1 + " partition(country='france') values ('paris')") .verifyExternalTable(primaryDbName, extTabName1, extTabLoc1) + .verifyExternalTable(primaryDbName, extPartTabName1, extPartTabLoc1) .dump("repl dump " + primaryDbName + " with ('hive.repl.include.external.tables'='true')"); replica.load(replicatedDbName, tuple.dumpLocation) @@ -1561,12 +1570,18 @@ public void testDumpExternalTableSetTrue() throws Throwable { .verifyResult("france") .run("select id from " + extTabName1) .verifyResult("3") + .run("select country from " + extPartTabName1 + " where country = 'us'") + .verifyResult("us") + .run("select country from " + extPartTabName1 + " where country = 'france'") + .verifyResult("france") .verifyExternalTable(replicatedDbName, "t1", primary.getTableLocation(primaryDbName, "t1")) .verifyExternalTable(replicatedDbName, "t2", primary.getTableLocation(primaryDbName, "t2")) .verifyExternalTable(replicatedDbName, extTabName1, - primary.getTableLocation(primaryDbName, extTabName1)); + primary.getTableLocation(primaryDbName, extTabName1)) + .verifyExternalTable(replicatedDbName, extPartTabName1, + primary.getTableLocation(primaryDbName, extPartTabName1)); String extTabName2 = "et2"; String extTabLoc2 = primary.createExternalLoc(primaryDbName + "_" + extTabName2); @@ -1612,8 +1627,9 @@ public void testDumpExternalTableSetTrue() throws Throwable { .run("select id from " + extTabName2 + " where id = 6") .verifyResult("6"); - primary.deleteExternalLoc(extTabLoc1); - primary.deleteExternalLoc(extTabLoc2); + primary.deleteLocation(extTabLoc1); + primary.deleteLocation(extTabLoc2); + primary.deleteLocation(extPartTabLoc1); } @Test diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java index cbe60331bacb..12daa48b82b8 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java @@ -197,16 +197,14 @@ public String createExternalLoc(String location) throws IOException, SemanticExc } /** - * Delete location pointed by the given path with respect to the external table location root on - * DFS. + * Delete location pointed by the given path on DFS. * @param location * @throws IOException * @throws SemanticException */ - public void deleteExternalLoc(String location) throws IOException, SemanticException { - String extLoc = extLocRoot + "/" + location; + public void deleteLocation(String location) throws IOException, SemanticException { DistributedFileSystem fs = miniDFSCluster.getFileSystem(); - fs.delete(new Path(extLoc), true); + fs.delete(new Path(location), true); } public HiveConf getConf() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java index 172b4ac44629..ef1a545f5f18 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java @@ -103,7 +103,11 @@ public LoadPartitions(Context context, ReplLogger replLogger, TableContext table private String location() throws MetaException, HiveException { Database parentDb = context.hiveDb.getDatabase(tableDesc.getDatabaseName()); - if (!tableContext.waitOnPrecursor()) { + if (tableDesc.isExternal()) { + // For an external table, we need to use the path specified in the source table but the + // scheme, authority and root path of the target file system. + return context.warehouse.getDnsPath(tableDesc.getSourceTableLocationPath()).toString(); + } else if (!tableContext.waitOnPrecursor()) { return context.warehouse.getDefaultTablePath( parentDb, tableDesc.getTableName(), tableDesc.isExternal()).toString(); } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java index 33945749f61b..b816b0d41910 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java @@ -18,11 +18,9 @@ package org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.table; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ReplCopyTask; import org.apache.hadoop.hive.ql.exec.Task; @@ -209,10 +207,10 @@ private String location(ImportTableDesc tblDesc, Database parentDb) throws MetaException, SemanticException { - if (tblDesc.tableType().equals(TableType.EXTERNAL_TABLE)) { + if (tblDesc.isExternal()) { // For an external table, we need to use the path specified in the source table but the // scheme, authority and root path of the target file system. - return context.warehouse.getDnsPath(tblDesc.getTableLocationPath()).toString(); + return context.warehouse.getDnsPath(tblDesc.getSourceTableLocationPath()).toString(); } else if (!tableContext.waitOnPrecursor()) { return context.warehouse.getDefaultTablePath( parentDb, tblDesc.getTableName(), tblDesc.isExternal()).toString(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 514783ba0f22..1fed0c117250 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -1060,10 +1060,10 @@ private static void createReplImportTasks( } if (tblDesc.getLocation() == null) { - if (tblDesc.tableType().equals(TableType.EXTERNAL_TABLE)) { + if (tblDesc.isExternal()) { // For an external table, we need to use the path specified in the source table but the // scheme, authority and root path of the target file system. - tblDesc.setLocation(wh.getDnsPath(tblDesc.getTableLocationPath()).toString()); + tblDesc.setLocation(wh.getDnsPath(tblDesc.getSourceTableLocationPath()).toString()); } else if (!waitOnPrecursor){ tblDesc.setLocation(wh.getDefaultTablePath(parentDb, tblDesc.getTableName(), tblDesc.isExternal()).toString()); } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java index 9dc9aa739035..87faa2bff17a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java @@ -60,7 +60,8 @@ public ImportTableDesc(String dbName, Table table) throws Exception { case TABLE: this.createTblDesc = new CreateTableDesc(dbName, table.getTableName(), - false, // isExternal: set to false here, can be overwritten by the IMPORT stmt + // isExternal: can be overwritten by the IMPORT stmt + TableType.EXTERNAL_TABLE.equals(table.getTableType()), false, table.getSd().getCols(), table.getPartitionKeys(), @@ -342,11 +343,6 @@ public boolean isMaterializedView() { } public TableType tableType() { - if (isView()) { - return TableType.VIRTUAL_VIEW; - } else if (isMaterializedView()) { - return TableType.MATERIALIZED_VIEW; - } return table.getTableType(); } @@ -367,7 +363,7 @@ public void setReplWriteId(Long replWriteId) { } } - public Path getTableLocationPath() { + public Path getSourceTableLocationPath() { return table.getPath(); } }