-
Notifications
You must be signed in to change notification settings - Fork 4.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HIVE-24187: Handle _files creation for HA config with same nameservic… #1515
Changes from 1 commit
4158e13
4f80b69
dae23fb
432820d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -77,6 +77,7 @@ | |
import static org.junit.Assert.assertTrue; | ||
|
||
public class TestReplicationScenariosAcrossInstances extends BaseReplicationAcrossInstances { | ||
private static final String NS_REMOTE = "nsRemote"; | ||
@BeforeClass | ||
public static void classLevelSetup() throws Exception { | ||
HashMap<String, String> overrides = new HashMap<>(); | ||
|
@@ -1604,6 +1605,104 @@ public void testRangerReplication() throws Throwable { | |
.verifyResults(new String[] {"1", "2"}); | ||
} | ||
|
||
@Test | ||
public void testHdfsNamespaceLazyCopy() throws Throwable { | ||
List<String> clause = getHdfsNamespaceClause(); | ||
primary.run("use " + primaryDbName) | ||
.run("create table acid_table (key int, value int) partitioned by (load_date date) " + | ||
"clustered by(key) into 2 buckets stored as orc tblproperties ('transactional'='true')") | ||
.run("create table table1 (i String)") | ||
.run("insert into table1 values (1)") | ||
.run("insert into table1 values (2)") | ||
.dump(primaryDbName, clause); | ||
|
||
try{ | ||
replica.load(replicatedDbName, primaryDbName, clause); | ||
Assert.fail("Expected the UnknownHostException to be thrown."); | ||
} catch (IllegalArgumentException ex) { | ||
assertTrue(ex.getMessage().contains("java.net.UnknownHostException: nsRemote")); | ||
} | ||
} | ||
|
||
@Test | ||
public void testHdfsNamespaceLazyCopyIncr() throws Throwable { | ||
List<String> clause = getHdfsNamespaceClause(); | ||
primary.run("use " + primaryDbName) | ||
.run("create table acid_table (key int, value int) partitioned by (load_date date) " + | ||
"clustered by(key) into 2 buckets stored as orc tblproperties ('transactional'='true')") | ||
.run("create table table1 (i String)") | ||
.run("insert into table1 values (1)") | ||
.run("insert into table1 values (2)") | ||
.dump(primaryDbName); | ||
|
||
replica.load(replicatedDbName, primaryDbName, clause) | ||
.run("use " + replicatedDbName) | ||
.run("show tables") | ||
.verifyResults(new String[] {"acid_table", "table1"}) | ||
.run("select * from table1") | ||
.verifyResults(new String[] {"1", "2"}); | ||
|
||
primary.run("use " + primaryDbName) | ||
.run("insert into table1 values (3)") | ||
.run("insert into table1 values (4)") | ||
.dump(primaryDbName, clause); | ||
try{ | ||
replica.load(replicatedDbName, primaryDbName, clause); | ||
Assert.fail("Expected the UnknownHostException to be thrown."); | ||
} catch (IllegalArgumentException ex) { | ||
assertTrue(ex.getMessage().contains("java.net.UnknownHostException: nsRemote")); | ||
} | ||
} | ||
|
||
@Test | ||
public void testHdfsNamespaceWithDataCopy() throws Throwable { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nameservice |
||
List<String> clause = getHdfsNamespaceClause(); | ||
//NS replacement parameters has no effect when data is also copied to staging | ||
clause.add("'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET + "'='false'"); | ||
primary.run("use " + primaryDbName) | ||
.run("create table acid_table (key int, value int) partitioned by (load_date date) " + | ||
"clustered by(key) into 2 buckets stored as orc tblproperties ('transactional'='true')") | ||
.run("create table table1 (i String)") | ||
.run("insert into table1 values (1)") | ||
.run("insert into table1 values (2)") | ||
.dump(primaryDbName, clause); | ||
replica.load(replicatedDbName, primaryDbName, clause) | ||
.run("use " + replicatedDbName) | ||
.run("show tables") | ||
.verifyResults(new String[] {"acid_table", "table1"}) | ||
.run("select * from table1") | ||
.verifyResults(new String[] {"1", "2"}); | ||
|
||
primary.run("use " + primaryDbName) | ||
.run("insert into table1 values (3)") | ||
.run("insert into table1 values (4)") | ||
.dump(primaryDbName, clause); | ||
replica.load(replicatedDbName, primaryDbName, clause) | ||
.run("use " + replicatedDbName) | ||
.run("show tables") | ||
.verifyResults(new String[] {"acid_table", "table1"}) | ||
.run("select * from table1") | ||
.verifyResults(new String[] {"1", "2", "3", "4"}); | ||
} | ||
|
||
@Test | ||
public void testCreateFunctionWithHdfsNamespace() throws Throwable { | ||
Path identityUdfLocalPath = new Path("../../data/files/identity_udf.jar"); | ||
Path identityUdf1HdfsPath = new Path(primary.functionsRoot, "idFunc1" + File.separator + "identity_udf1.jar"); | ||
setupUDFJarOnHDFS(identityUdfLocalPath, identityUdf1HdfsPath); | ||
List<String> clause = getHdfsNamespaceClause(); | ||
primary.run("CREATE FUNCTION " + primaryDbName | ||
+ ".idFunc1 as 'IdentityStringUDF' " | ||
+ "using jar '" + identityUdf1HdfsPath.toString() + "'"); | ||
WarehouseInstance.Tuple bootStrapDump = primary.dump(primaryDbName, clause); | ||
try{ | ||
replica.load(replicatedDbName, primaryDbName, clause); | ||
Assert.fail("Expected the UnknownHostException to be thrown."); | ||
} catch (IllegalArgumentException ex) { | ||
assertTrue(ex.getMessage().contains("java.net.UnknownHostException: nsRemote")); | ||
} | ||
} | ||
|
||
@Test | ||
public void testRangerReplicationRetryExhausted() throws Throwable { | ||
List<String> clause = Arrays.asList("'" + HiveConf.ConfVars.REPL_INCLUDE_AUTHORIZATION_METADATA + "'='true'", | ||
|
@@ -1963,4 +2062,12 @@ private void setupUDFJarOnHDFS(Path identityUdfLocalPath, Path identityUdfHdfsPa | |
FileSystem fs = primary.miniDFSCluster.getFileSystem(); | ||
fs.copyFromLocalFile(identityUdfLocalPath, identityUdfHdfsPath); | ||
} | ||
|
||
private List<String> getHdfsNamespaceClause() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. replace with nameservice |
||
List<String> withClause = new ArrayList<>(); | ||
withClause.add("'" + HiveConf.ConfVars.REPL_HA_DATAPATH_REPLACE_REMOTE_NAMESERVICE.varname + "'='true'"); | ||
withClause.add("'" + HiveConf.ConfVars.REPL_HA_DATAPATH_REPLACE_REMOTE_NAMESERVICE_NAME.varname + "'='" | ||
+ NS_REMOTE + "'"); | ||
return withClause; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,9 +23,11 @@ | |
import org.apache.hadoop.hive.common.ValidTxnList; | ||
import org.apache.hadoop.hive.common.repl.ReplScope; | ||
import org.apache.hadoop.hive.conf.HiveConf; | ||
import org.apache.hadoop.hive.metastore.ReplChangeManager; | ||
import org.apache.hadoop.hive.metastore.TableType; | ||
import org.apache.hadoop.hive.metastore.api.Database; | ||
import org.apache.hadoop.hive.metastore.api.NotificationEvent; | ||
import org.apache.hadoop.hive.metastore.utils.StringUtils; | ||
import org.apache.hadoop.hive.ql.ErrorMsg; | ||
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; | ||
import org.apache.hadoop.hive.ql.exec.Utilities; | ||
|
@@ -49,6 +51,8 @@ | |
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.io.PrintWriter; | ||
import java.net.URI; | ||
import java.net.URISyntaxException; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
|
@@ -72,6 +76,40 @@ public static void writeOutput(List<List<String>> listValues, Path outputFile, H | |
writeOutput(listValues, outputFile, hiveConf, false); | ||
} | ||
|
||
/** | ||
* Given a ReplChangeManger's encoded uri, replaces the namespace and returns the modified encoded uri. | ||
*/ | ||
public static String replaceNameSpaceInEncodedURI(String cmEncodedURI, HiveConf hiveConf) throws SemanticException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. replace name service? |
||
String newNS = hiveConf.get(HiveConf.ConfVars.REPL_HA_DATAPATH_REPLACE_REMOTE_NAMESERVICE_NAME.varname); | ||
if (StringUtils.isEmpty(newNS)) { | ||
throw new SemanticException(ErrorMsg.REPL_INVALID_CONFIG_FOR_SERVICE | ||
.format("Configuration 'hive.repl.ha.datapath.replace.remote.nameservice.name' is not valid " | ||
+ newNS == null ? "null" : newNS, ReplUtils.REPL_HIVE_SERVICE)); | ||
} | ||
String[] decodedURISplits = ReplChangeManager.decodeFileUri(cmEncodedURI); | ||
// replace both data path and repl cm root path and construct new URI. Checksum and subDir will be same as old. | ||
String modifiedURI = ReplChangeManager.encodeFileUri(replaceHost(decodedURISplits[0], newNS), decodedURISplits[1], | ||
replaceHost(decodedURISplits[2], newNS), decodedURISplits[3]); | ||
LOG.debug("Modified encoded uri {}, to {} ", cmEncodedURI, modifiedURI); | ||
return modifiedURI; | ||
} | ||
|
||
private static String replaceHost(String originalURIStr, String newHost) throws SemanticException { | ||
if (StringUtils.isEmpty(originalURIStr)) { | ||
return originalURIStr; | ||
} | ||
URI origUri = URI.create(originalURIStr); | ||
try { | ||
return new URI(origUri.getScheme(), | ||
origUri.getUserInfo(), newHost, origUri.getPort(), | ||
origUri.getPath(), origUri.getQuery(), | ||
origUri.getFragment()).toString(); | ||
} catch (URISyntaxException ex) { | ||
throw new SemanticException(ex); | ||
} | ||
} | ||
|
||
|
||
public static void writeOutput(List<List<String>> listValues, Path outputFile, HiveConf hiveConf, boolean update) | ||
throws SemanticException { | ||
Retryable retryable = Retryable.builder() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -424,6 +424,20 @@ public String encodeFileUri(String fileUriStr, String fileChecksum, String encod | |
return encodedUri; | ||
} | ||
|
||
public static String encodeFileUri(String fileUriStr, String fileChecksum, String cmroot, String encodedSubDir) { | ||
String encodedUri = fileUriStr; | ||
if ((fileChecksum != null) && (cmroot != null)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. empty check not needed? |
||
encodedUri = encodedUri + URI_FRAGMENT_SEPARATOR + fileChecksum + URI_FRAGMENT_SEPARATOR + cmroot; | ||
} else { | ||
encodedUri = encodedUri + URI_FRAGMENT_SEPARATOR + URI_FRAGMENT_SEPARATOR; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we have 2 URI_FRAGMENT_SEPARATOR |
||
} | ||
encodedUri = encodedUri + URI_FRAGMENT_SEPARATOR + ((encodedSubDir != null) ? encodedSubDir : ""); | ||
if (LOG.isDebugEnabled()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need this check? |
||
LOG.debug("Encoded URI: " + encodedUri); | ||
} | ||
return encodedUri; | ||
} | ||
|
||
/*** | ||
* Split uri with fragment into file uri, subdirs, checksum and source cmroot uri. | ||
* Currently using fileuri#checksum#cmrooturi#subdirs as the format. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sentence is incomplete